R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. Html_fragmented is quicker to load than html, hence for this pipeline which genenerated a large mount of data, we have used html_fragment to report Rmarkdown.

Checklist for running this pipeline

#1) GEO series number from publication or seatching GEO for dataset of interest, here GSE=GSE33000 and GSE=GSE43490 #2) The GPL gene annotation file, here GPL=GPL4372 for GSE33000 and GPL=GPL6480 for GSE43490 #3) To reuse code for other studies replace the GEO and GPL numbers #4) metadata file made by manually combining individual metadata files from GEO to give ‘merge_GSE33000_GSE43490_metadata.csv’ #5) metadata coded file made manually from ‘merge_GSE33000_GSE43490_metadata_SampleID.csv’ outputed at end of merge code Step 12 to give ‘merge_GSE33000_GSE43490_metadata_SampleID_Disease_single_coded’ code used is Study GSE33000=1, GSE43490=2; Gender Female=1, Male=2 ; Disease CON=1, DIS=2 #6) run code till step 28 and then based on cell type and trait associated module replace in code “black” or “HuAgeDis_07” with module name that is associated with trait or disease of interest. And replace hub gene names. #7) WGCNA assigns colors randomly (except few reserved colors like “grey” for unassigned genes). Therefore, upon re-run of this analysis it may call the age and disease associated microglia enriched “black” module by some other color such as “pink”, in which case this color is specified step 29 onwards as described in point 6) above. #8) The present analysis was done on MacOS, using knitToHtmlfragment.

#9)To reuse this code for other datasets a) replace 1) and 2) input files above with equivalent files for the dataset b) replace “HuAgeDis_” for module naming to pre-fix of users choice that is meaningful for the dataset. Here Hu=Human, Age=Age, Dis=Disease c) in this pipeline human gene symbols are used

Getting raw data GSE33000

Step1: Load required libraries ans setting working directory

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/HuAgeDis11-30-18"
#Install packages by uncommenting two lines below if packages not already installed before
#source("https://bioconductor.org/biocLite.R")
#biocLite(c("GEOquery","Biobase", "limma", "R.utils", "lumi", "DT"))

library(Biobase)
library(GEOquery)
library(lumi)
library(R.utils)
library(DT)
library(limma)
library(sva)
library(pamr)
#At start of pipeline no input files are needed except the .Rproj which sets working directory to current directory and keeps R environment independent of other folders and the .Rmd R code
list.files()
## [1] "HumanAgeDis.Rmd"   "HumanAgeDis.Rproj" "InputHuAgeDis"

Step2: Getting GEO expression data and metadata

#2.1 Get GE0 data as R object

# Saving series and platform gene annotation data from GEO to gset object
gset <- getGEO("GSE33000", GSEMatrix =TRUE, AnnotGPL=TRUE)
if (length(gset) > 1) idx <- grep("GPL4372", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]] 

#2.2 keep only samples of interest

#Interested in all samples so keeping everything. 

#2.3 Getting GEO metadata of interest

#Names of available phenotype data
names(pData(gset))
##  [1] "title"                   "geo_accession"          
##  [3] "status"                  "submission_date"        
##  [5] "last_update_date"        "type"                   
##  [7] "channel_count"           "source_name_ch1"        
##  [9] "organism_ch1"            "characteristics_ch1"    
## [11] "characteristics_ch1.1"   "treatment_protocol_ch1" 
## [13] "growth_protocol_ch1"     "molecule_ch1"           
## [15] "extract_protocol_ch1"    "label_ch1"              
## [17] "label_protocol_ch1"      "taxid_ch1"              
## [19] "source_name_ch2"         "organism_ch2"           
## [21] "characteristics_ch2"     "characteristics_ch2.1"  
## [23] "characteristics_ch2.2"   "characteristics_ch2.3"  
## [25] "treatment_protocol_ch2"  "growth_protocol_ch2"    
## [27] "molecule_ch2"            "extract_protocol_ch2"   
## [29] "label_ch2"               "label_protocol_ch2"     
## [31] "taxid_ch2"               "hyb_protocol"           
## [33] "scan_protocol"           "description"            
## [35] "data_processing"         "platform_id"            
## [37] "contact_name"            "contact_email"          
## [39] "contact_department"      "contact_institute"      
## [41] "contact_address"         "contact_city"           
## [43] "contact_state"           "contact_zip/postal_code"
## [45] "contact_country"         "supplementary_file"     
## [47] "data_row_count"          "age:ch2"                
## [49] "disease status:ch2"      "gender:ch2"             
## [51] "sample type:ch1"         "tissue:ch1"             
## [53] "tissue:ch2"
metadata=data.frame(gset$geo_accession, gset$source_name_ch1, gset$`tissue:ch1`, gset$`gender:ch2`, gset$`age:ch2`,gset$`disease status:ch2`)
head(metadata)
##   gset.geo_accession gset.source_name_ch1       gset..tissue.ch1.
## 1         GSM1423780      HBTRC_PF_Pool_1 prefrontal cortex brain
## 2         GSM1423781      HBTRC_PF_Pool_2 prefrontal cortex brain
## 3         GSM1423782      HBTRC_PF_Pool_3 prefrontal cortex brain
## 4         GSM1423783      HBTRC_PF_Pool_4 prefrontal cortex brain
## 5         GSM1423784      HBTRC_PF_Pool_5 prefrontal cortex brain
## 6         GSM1423785      HBTRC_PF_Pool_6 prefrontal cortex brain
##   gset..gender.ch2. gset..age.ch2. gset..disease.status.ch2.
## 1            female         67 yrs       Alzheimer's disease
## 2              male         88 yrs       Alzheimer's disease
## 3              male         62 yrs       Alzheimer's disease
## 4            female         90 yrs       Alzheimer's disease
## 5            female         90 yrs       Alzheimer's disease
## 6            female         95 yrs       Alzheimer's disease
write.csv(metadata,'GSE33000_metadata.csv')

#2.4 Get GE0 raw expression data.

DT::datatable(exprs(gset)[1:3,])
#The expression data uploaded for this entry is Agilent dual color cy5 cy3 label microarray we will get this data
exprList<-exprs(gset)
write.table(exprList, "GSE33000_expression_data_originalfromGEO.txt", sep="\t", quote=F)

Step3: Quantile normalization with lumi

#3.1 Reading and formatting expression data for normalization

#Now we read in the raw expression values
exprRaw<-read.delim('GSE33000_expression_data_originalfromGEO.txt',header=T, sep='\t')
DT::datatable(exprRaw[1:3,])
#The original GSE33000 is in log base 10 log10 ratio (Cy5/Cy3) so lets convert it back to non-log scale. This we can determine by looking at the 1row-1column of the exprRaw datatable above and the 1st datapoint online reference to this data file both of which are -1.618405e-02 https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM1423780

#conversiton back to non-log scale
exprRaw1 = 10^exprRaw
DT::datatable(exprRaw1[1:3,])
write.table(exprRaw1, "GSE33000_expression_data_non-log.txt", sep="\t", quote=F)

#3.2 Using lumi for quantile normalization

#Perform qualtile normalization on the raw expression data should be matrix format.
exprLumi<-lumiN(as.matrix(exprRaw1),method="quantile")
## Perform quantile normalization ...
DT::datatable(exprLumi[1:3,])

#3.3 log2+1 transformation

#log2+1 transform the expression data. This step also changes it back to data frame format
exprLog<-log2(exprLumi+1)
DT::datatable(exprLog[1:3,])

#3.4 Check for negative values. Negative values will not work in WGCNA

exprRaw_ifneg<-apply(exprRaw, 1, function(row) any(row <0))
length(which(exprRaw_ifneg)) #what is the length of negative numbers
## [1] 39262
exprRaw1_ifneg<-apply(exprRaw1, 1, function(row) any(row <0))
length(which(exprRaw1_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values

exprLumi_ifneg<-apply(exprLumi, 1, function(row) any(row <0))
length(which(exprLumi_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values 

exprLog_ifneg<-apply(exprLog, 1, function(row) any(row <0))
length(which(exprLog_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values 

#3.5 Check for max and min values. Ususally don’t want max to be <100

exprRaw_max<-which.max(as.matrix(exprRaw))
exprRaw_max
## [1] 26420
exprRaw_min<-which.min(as.matrix(exprRaw))
exprRaw_min
## [1] 151436
exprRaw1_max<-which.max(as.matrix(exprRaw1))
exprRaw1_max
## [1] 26420
exprRaw1_min<-which.min(as.matrix(exprRaw1))
exprRaw1_min
## [1] 151436
exprLumi_max<-which.max(as.matrix(exprLumi))
exprLumi_max
## [1] 110960
exprLumi_min<-which.min(as.matrix(exprLumi))
exprLumi_min
## [1] 9190
exprLog_max<-which.max(as.matrix(exprLog))
exprLog_max
## [1] 110960
exprLog_min<-which.min(as.matrix(exprLog))
exprLog_min
## [1] 9190

#3.6 Descriptive or summary statistics of the data

DT::datatable(summary(exprRaw))
DT::datatable(summary(exprRaw1))
DT::datatable(summary(exprLumi))
DT::datatable(summary(exprLog))
#3.7 Visualization of GSE33000 alone
pdf(file="Visualization_GSE33000_rawdata_transformation.pdf",height=5,width=5)
par(mfrow=c(2,2))

#exprRaw boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprRaw, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprRaw1 boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprRaw1, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprLumi boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprLumi, outline=FALSE, las=2, cex=0.25, main="exprLumi", col="yellow")

#exprLog boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprLog, outline=FALSE, las=2, cex=0.25, main="exprLog", col="yellow")


#exprRaw MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprRaw, legend= "all", main="exprRaw Study", cex=0.5)#like PCA plot

#exprRaw1 MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprRaw1, legend= "all", main="exprRaw1 Study", cex=0.5)#like PCA plot

#exprLumi MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprLumi, legend= "all", main="exprLumi Study", cex=0.5)#like PCA plot

#exprLog MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprLog, legend= "all", main="exprLog Study", cex=0.5)#like PCA plot

dev.off()
## quartz_off_screen 
##                 2

Step4: Use GPL file to convert probe ID to GeneSymbol in exprLog that will be used for further analysis

#4.1 Getting GPL annotation file

GPLid<-annotation(gset)
GPL_file<-getGEO(GPLid)
## File stored at:
## /var/folders/0g/5tz57sgx7090n5vttfqvs3ym0000gn/T//Rtmpv5hBUt/GPL4372.soft
colnames(GPL_file@dataTable@table)
## [1] "ID"                            "EntrezGeneID"                 
## [3] "ORF"                           "GB_ACC"                       
## [5] "PROBE_DERIVED_FROM_TRANSCRIPT" "SPOT_ID"                      
## [7] "RosettaGeneModelID"
write.table(GPL_file@dataTable@table,"GPL4372.txt", sep='\t', quote=F)
#We pick the ID or reporter ID and ORF or gene symbols from options displayed above. We need this to annotate expression data. Will use ID to combine.
GPL_file1<-GPL_file@dataTable@table[,c("ID","ORF")]
DT::datatable(head(GPL_file1))

#4.2 preparing gene expression data exprLog for merging with annotation file

exprGene=exprLog
#colnames already are sample GSM id
DT::datatable(head(exprGene))
#Add "ID" column that we will use for merging with annotation
exprGene1=cbind(exprGene, rownames(exprGene))
colnames(exprGene1)=c(colnames(exprGene),"ID")
DT::datatable(head(exprGene1))

#4.3 get gene expression data with gene symbols. GEO annotation is static and ensures that annotation of genes does not change over time.

#This is complete gene expression with Gene symbol annotations
#Save and use columns for pipelines as needed
exprGene2=merge(exprGene1, GPL_file1, by="ID")
dim(exprGene2)
## [1] 39280   626
DT::datatable(head(exprGene2))
write.csv(exprGene2, "GSE33000_Annotation_Expr_HuGene.csv")
save.image(file="preSVALM_GSE33000_temp.RData")

Getting raw data GSE43490

Step6: Getting GEO expression data and metadata

#6.1 Get GE0 data as R object

# Saving series and platform gene annotation data from GEO to gset object
gset <- getGEO("GSE43490", GSEMatrix =TRUE, AnnotGPL=TRUE)
if (length(gset) > 1) idx <- grep("GPL6480", attr(gset, "names")) else idx <- 1
gset <- gset[[idx]] 

#6.2 keep only samples of interest

#these groups were selected on the GEO2R to keep only sunstanitia niagra samples
gsms <- "XXXXXXXXXXXXXX1111111100000XXXXXXXXXXXXXX"
sml <- c()
for (i in 1:nchar(gsms)) { sml[i] <- substr(gsms,i,i) }
# eliminate samples marked as "X"
sel <- which(sml != "X")
sml <- sml[sel]
gset <- gset[ ,sel] 

#6.3 Getting GEO metadata of interest

#Names of available phenotype data
names(pData(gset))
##  [1] "title"                   "geo_accession"          
##  [3] "status"                  "submission_date"        
##  [5] "last_update_date"        "type"                   
##  [7] "channel_count"           "source_name_ch1"        
##  [9] "organism_ch1"            "characteristics_ch1"    
## [11] "characteristics_ch1.1"   "characteristics_ch1.2"  
## [13] "characteristics_ch1.3"   "treatment_protocol_ch1" 
## [15] "molecule_ch1"            "extract_protocol_ch1"   
## [17] "label_ch1"               "label_protocol_ch1"     
## [19] "taxid_ch1"               "hyb_protocol"           
## [21] "scan_protocol"           "description"            
## [23] "data_processing"         "platform_id"            
## [25] "contact_name"            "contact_email"          
## [27] "contact_phone"           "contact_laboratory"     
## [29] "contact_department"      "contact_institute"      
## [31] "contact_address"         "contact_city"           
## [33] "contact_state"           "contact_zip/postal_code"
## [35] "contact_country"         "supplementary_file"     
## [37] "data_row_count"          "age:ch1"                
## [39] "disease stage:ch1"       "disease state:ch1"      
## [41] "gender:ch1"
metadata=data.frame(gset$geo_accession, gset$source_name_ch1, gset$`gender:ch1`, gset$`age:ch1`,gset$`disease state:ch1`, gset$`disease stage:ch1`)
head(metadata)
##   gset.geo_accession    gset.source_name_ch1 gset..gender.ch1.
## 1         GSM1294118 brain, substantia_nigra            Female
## 2         GSM1294119 brain, substantia_nigra              Male
## 3         GSM1294120 brain, substantia_nigra              Male
## 4         GSM1294121 brain, substantia_nigra            Female
## 5         GSM1294122 brain, substantia_nigra            Female
## 6         GSM1294123 brain, substantia_nigra              Male
##   gset..age.ch1. gset..disease.state.ch1. gset..disease.stage.ch1.
## 1             85      Parkinson's disease                  Braak 4
## 2             64      Parkinson's disease                  Braak 4
## 3             66      Parkinson's disease                  Braak 4
## 4             80      Parkinson's disease                  Braak 4
## 5             84      Parkinson's disease                  Braak 4
## 6             68      Parkinson's disease                  Braak 5
write.csv(metadata,'GSE43490_metadata.csv')

#6.4 Get GE0 raw expression data.

DT::datatable(exprs(gset)[1:3,])
#The expression data uploaded for this entry is Agilent single color cy3 label we will get this data
exprList<-exprs(gset)
write.table(exprList, "GSE43490_expression_data_originalfromGEO.txt", sep="\t", quote=F)

Step7: Quantile normalization with lumi

#7.1 Reading and formatting expression data normalization

#Now we read in the raw expression values
exprRaw<-read.delim('GSE43490_expression_data_originalfromGEO.txt',header=T, sep='\t')
DT::datatable(exprRaw[1:3,])
#The original GSE43490 is in log base 2 log2 single color Cy3 Lowess method normalized so lets convert it back to non-log scale. This we can determine by looking at the 1row-1column of the exprRaw datatable above and the 1st datapoint online reference to this data file both of which are 5.800456543 https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM1294104

#conversiton back to non-log scale
exprRaw1 = 2^exprRaw
DT::datatable(exprRaw1[1:3,])
write.table(exprRaw1, "GSE43490_expression_data_non-log.txt", sep="\t", quote=F)

#7.2 Using lumi for quantile normalization

#Perform qualtile normalization on the raw expression data should be matrix format.
exprLumi<-lumiN(as.matrix(exprRaw1),method="quantile")
## Perform quantile normalization ...
DT::datatable(exprLumi[1:3,])

#7.3 log2+1 transformation

#log2+1 transform the expression data. This step also changes it back to data frame format
exprLog<-log2(exprLumi+1)
DT::datatable(exprLog[1:3,])

#7.4 Check for negative values. Negative values will not work in WGCNA

exprRaw_ifneg<-apply(exprRaw, 1, function(row) any(row <0))
length(which(exprRaw_ifneg)) #what is the length of negative numbers
## [1] 0
exprRaw1_ifneg<-apply(exprRaw1, 1, function(row) any(row <0))
length(which(exprRaw1_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values

exprLumi_ifneg<-apply(exprLumi, 1, function(row) any(row <0))
length(which(exprLumi_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values 

exprLog_ifneg<-apply(exprLog, 1, function(row) any(row <0))
length(which(exprLog_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values 

#7.5 Check for max and min values. Ususally don’t want max to be <100

exprRaw_max<-which.max(as.matrix(exprRaw))
exprRaw_max
## [1] 151507
exprRaw_min<-which.min(as.matrix(exprRaw))
exprRaw_min
## [1] 3548
exprRaw1_max<-which.max(as.matrix(exprRaw1))
exprRaw1_max
## [1] 151507
exprRaw1_min<-which.min(as.matrix(exprRaw1))
exprRaw1_min
## [1] 3548
exprLumi_max<-which.max(as.matrix(exprLumi))
exprLumi_max
## [1] 4108
exprLumi_min<-which.min(as.matrix(exprLumi))
exprLumi_min
## [1] 3548
exprLog_max<-which.max(as.matrix(exprLog))
exprLog_max
## [1] 4108
exprLog_min<-which.min(as.matrix(exprLog))
exprLog_min
## [1] 3548
#7.6 Visualization of GSE43490 alone
pdf(file="Visualization_GSE43490_rawdata_transformation.pdf",height=5,width=5)
par(mfrow=c(2,2))

#exprRaw boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprRaw, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprRaw1 boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprRaw1, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprLumi boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprLumi, outline=FALSE, las=2, cex=0.25, main="exprLumi", col="yellow")

#exprLog boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(exprLog, outline=FALSE, las=2, cex=0.25, main="exprLog", col="yellow")


#exprRaw MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprRaw, legend= "all", main="exprRaw Study", cex=0.5)#like PCA plot

#exprRaw1 MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprRaw1, legend= "all", main="exprRaw1 Study", cex=0.5)#like PCA plot

#exprLumi MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprLumi, legend= "all", main="exprLumi Study", cex=0.5)#like PCA plot

#exprLog MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(exprLog, legend= "all", main="exprLog Study", cex=0.5)#like PCA plot

dev.off()
## quartz_off_screen 
##                 2

Step8: Use GPL file to convert probe ID to GeneSymbol in exprLog that will be used for further analysis

#8.1 Getting GPL annotation file

GPLid<-annotation(gset)
GPL_file<-getGEO(GPLid)
## File stored at:
## /var/folders/0g/5tz57sgx7090n5vttfqvs3ym0000gn/T//Rtmpv5hBUt/GPL6480.soft
colnames(GPL_file@dataTable@table)
##  [1] "ID"                   "SPOT_ID"              "CONTROL_TYPE"        
##  [4] "REFSEQ"               "GB_ACC"               "GENE"                
##  [7] "GENE_SYMBOL"          "GENE_NAME"            "UNIGENE_ID"          
## [10] "ENSEMBL_ID"           "TIGR_ID"              "ACCESSION_STRING"    
## [13] "CHROMOSOMAL_LOCATION" "CYTOBAND"             "DESCRIPTION"         
## [16] "GO_ID"                "SEQUENCE"
write.table(GPL_file@dataTable@table,"GPL6480.txt", sep='\t', quote=F)
#We pick the ID or reporter ID and ORF or gene symbols from options displayed above. We need this to annotate expression data. Will use ID to combine.
GPL_file1<-GPL_file@dataTable@table[,c("ID","GENE_SYMBOL")]
DT::datatable(head(GPL_file1))

#8.2 preparing gene expression data exprLog for merging with annotation file

exprGene=exprLog
#colnames already are sample GSM id
DT::datatable(head(exprGene))
#Add "ID" column that we will use for merging with annotation
exprGene1=cbind(exprGene, rownames(exprGene))
colnames(exprGene1)=c(colnames(exprGene),"ID")
DT::datatable(head(exprGene1))

#8.3 get gene expression data with gene symbols. GEO annotation is static and ensures that annotation of genes does not change over time.

#This is complete gene expression with Gene symbol annotations
#Save and use columns for pipelines as needed
exprGene2=merge(exprGene1, GPL_file1, by="ID")
dim(exprGene2)
## [1] 22627    15
DT::datatable(head(exprGene2))
write.csv(exprGene2, "GSE43490_Annotation_Expr_HuGene.csv")
#The files we needed are saved so we now clear workspace and delete files and folders that are not needed 
save.image(file="preSVALM_GSE43490_temp.RData")
rm(list=ls())
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  8224275 439.3   26482191 1414.4         NA  22198464 1185.6
## Vcells 22624544 172.7  320402187 2444.5     102400 400418118 3055.0

Merging GSE33000 and GSE43490

Step9: Load required libraries , setting working directory and Import data

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/HuAgeDis11-30-18"
#Install packages by uncommenting two lines below if packages not already installed before
#source("https://bioconductor.org/biocLite.R")
#biocLite(c("tidyr","dplyr"))
library(dplyr) # used for general data wrangling
library(tidyr) # used for tidying tables
GSE33000Expr=read.csv("GSE33000_Annotation_Expr_HuGene.csv", header =T, sep=',', na.strings = c("", "NA"))
GSE33000Expr=GSE33000Expr %>% na.omit()

GSE43490Expr=read.csv("GSE43490_Annotation_Expr_HuGene.csv", header =T, sep=',', na.strings = c("", "NA"))
GSE43490Expr=GSE43490Expr %>% na.omit()
DT::datatable(GSE33000Expr[1:7,1:7])
DT::datatable(GSE43490Expr[1:7,1:7])
dim(GSE33000Expr)
## [1] 21159   627
dim(GSE43490Expr)
## [1] 11776    16

Step10: merge datasets

#Rename the last column name of both datasets to same 'GeneHu'
colnames(GSE33000Expr)[colnames(GSE33000Expr)=='ORF']='GeneHu'
colnames(GSE43490Expr)[colnames(GSE43490Expr)=='GENE_SYMBOL']='GeneHu'
mergeExpr=merge(GSE33000Expr,GSE43490Expr, by='GeneHu')
dim(mergeExpr)
## [1] 11762   642
dim(GSE33000Expr)
## [1] 21159   627
dim(GSE43490Expr)
## [1] 11776    16
#Except one ID column and one gene column, remove other non-sample annotation columns. 
#Here picked GSE33000 ID column and gene column to save. Since in microarray probe IDs are unique genes are not unique
mergeExpr=mergeExpr[-c(2,628,629)]
dim(mergeExpr)
## [1] 11762   639

Step11: Matching GSM or SRR number and replacing Sample_names of choice from merged metadata (same order as merging of expression data above)

Also incuded is a match column step in this code to ensure match

Before importing the metadate file, decide and include a column of sample names of your liking and the Study or batch information column you have.

The GSE33000_metadata.csv rows are followed by GSE43490_metadata.csv to make merged ‘merge_GSE33000_GSE43490_metadata.csv’

merge_metadata=read.csv('./InputHuAgeDis/merge_GSE33000_GSE43490_metadata.csv', header =T, sep=',')
DT::datatable(head(merge_metadata))
#Number of rows of above metadata is same as number of columns of Expr data +2 for GeneHu and probe IDs
dim(mergeExpr)
## [1] 11762   639
dim(merge_metadata)
## [1] 637   7
#Replace of column names in expression with sample names
colnames(mergeExpr) <- merge_metadata$Sample_name[match(colnames(mergeExpr), merge_metadata$GEO_Accession)]
colnames(mergeExpr)[1]<-"GeneHu"
colnames(mergeExpr)[2]<-"IDprobe"
#Lets drop the GEO accession number from the metadata now
merge_metadata=merge_metadata[,-c(1)]
DT::datatable(head(merge_metadata))

Step12: we can export the Expression data and Metadata with Sample_name for SVA LM analysis

write.csv(mergeExpr,"merge_GSE33000_GSE43490_Expr_GeneHu_SampleID.csv")
write.csv(merge_metadata,"merge_GSE33000_GSE43490_metadata_SampleID.csv")
save.image(file="preSVALM_merge_GSE33000_GSE43490_temp.RData")
rm(list=ls())
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  8226011 439.4   26482191 1414.4         NA  22198464 1185.6
## Vcells 22629699 172.7  256321749 1955.6     102400 400418118 3055.0

SVA+LM normalization for study or experiment or batch correction and removal of gender

Step13: Load libraries, set working directory and Import data

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/HuAgeDis11-30-18"
#source("https://bioconductor.org/biocLite.R")
#biocLite(c("sva","limma","bladderbatch","pamr"))
library(limma)
library(sva)
library(pamr)
library(ggplot2) # plot
library(ggmap)#plots
library(gplots)#plots
library(RColorBrewer)#color pallet
library(Hmisc)#for corelation plot
library(viridis)#for corelation plot
library(corrplot)#for corelation plot
#Metadata_coded
metadata_coded=read.csv("./InputHuAgeDis/merge_GSE33000_GSE43490_metadata_SampleID_Disease_single_coded.csv", header=T, sep=',')
#Expression data
Expr_HuGene=read.csv("merge_GSE33000_GSE43490_Expr_GeneHu_SampleID.csv", header =T, sep=',')
dim(metadata_coded)
## [1] 637   7
dim(Expr_HuGene)
## [1] 11762   640
#Need to make sure that the Gene IDs are unique to specify rownames with Gene IDs
Expr_HuGene = Expr_HuGene[!duplicated(Expr_HuGene$GeneHu),]
dim(metadata_coded)
## [1] 637   7
dim(Expr_HuGene)
## [1] 7301  640
#We lost some gene IDs are there are multiple probeIDs for genes IDs in microarray
Expr_HuGene_1=Expr_HuGene[,-c(1,2,3)] #get rid of some extra probe ID columns
rownames(Expr_HuGene_1)=Expr_HuGene$GeneHu 
Expr_HuGene_1=Expr_HuGene_1[complete.cases(Expr_HuGene_1), ]
edata=Expr_HuGene_1
metadata_coded_1=metadata_coded[,3:7]
rownames(metadata_coded_1)=metadata_coded$Sample_name
pheno=metadata_coded_1
DT::datatable(pheno)
DT::datatable(edata[1:7,1:7])
dim(metadata_coded)
## [1] 637   7
dim(pheno)
## [1] 637   5
dim(Expr_HuGene)
## [1] 7301  640
dim(edata)
## [1] 7301  637
#Make sure we did not lose any samples in expression data
#Here in the output the 1st two and the last two should be same i.e. the first and last samples are same
colnames(Expr_HuGene[4])
## [1] "F_67_AD_GSM1423780"
colnames(edata[1])
## [1] "F_67_AD_GSM1423780"
colnames(Expr_HuGene[640])
## [1] "F_70_CON_GSM1294130"
colnames(edata[637])
## [1] "F_70_CON_GSM1294130"

Step14: Preparing models for SVA (Surrogate variable adjustment)

#full model matrix, variable of interest and other variables
#Put categorical variables ahead of numeric variables 
#This dataset has two tissue sources.But the tissue here is also correlated with batch i.e. the two tissue is from two batches so this introduces computaational singularity and cannot be modeleed. So one of two such covariants can be used in mod. So here Study or batch is used and Tissue is not included in the model
mod = model.matrix(~Study+Gender+Disease+Age, data=pheno)
#null model matrix all except the variable of interest 
#To include only an intercept use mode 'mod0 = model.matrix(~1,data=pheno)'
#Put categorical variables ahead of numeric variables
mod0 = model.matrix(~Study+Gender,data=pheno)

Step15: Running SVA (Surrogate variable adjustment)

#Estimation of Surrogate variable
n.sv = num.sv(as.matrix(edata),mod,method="be") 
svobj = sva(as.matrix(edata),mod,mod0,n.sv=n.sv, B=20)
## Number of significant surrogate variables is:  40 
## Iteration (out of 20 ):1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20

sva function returns a list of 4 components: 1) sv= matrix with columns corrosponding to estimate of surrogate variables 2) pprob.gam=probability that each gene is associated with one or more latent variables 3) pprob.b=probability that each gene is associated with our variable of interest 4) n.sv=number of surrogate variables

Step16: Clean SVA adjusted expression data with LM regression

#Note here the column which we care about is not used pheno[c(-?)], Age in 3rd column, Disease in 4th column 
pheno_sva=cbind(pheno[-c(3,4)],svobj$sv)
reglm_sva<-lapply(1:nrow(edata), function(x){lm(unlist(edata[x,])~.,data=pheno_sva)})
DT::datatable(pheno_sva)
residuals<-lapply(reglm_sva, function(x)residuals(summary(x)))
residuals<-do.call(rbind, residuals)
edata_adjresiduals<-residuals+matrix(apply(edata, 1, mean), nrow=nrow(residuals), ncol=ncol(residuals))
rownames(edata_adjresiduals)=rownames(edata)
rownames(residuals)=rownames(edata)
DT::datatable(edata_adjresiduals[1:7,1:7])
dim(edata_adjresiduals)
## [1] 7301  637
write.csv(edata_adjresiduals, "edata_adjresiduals.csv")
#Save .RData and clear environment to free up memory
save.image(file="temp.RData")
rm(list=ls())
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  8429130 450.2   26482191 1414.4         NA  26482191 1414.4
## Vcells 22996190 175.5  490474000 3742.1     102400 570842476 4355.2
#This 'reglm_sva' is a big object so we remove it as we have saved the other variables we need 
load(file="temp.RData")
rm(reglm_sva)
gc() 
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  8439126 450.7   26482191 1414.4         NA  26482191 1414.4
## Vcells 46401020 354.1  472528580 3605.2     102400 570842476 4355.2

Step17: Visualization of before and after SVA and LM regression

Before SVA and LM adjustment

edata #After SVA and LM adjustment edata_adjresiduals

#Summary Statistics edata
DT::datatable(summary(edata[1:7,1:7]))
write.csv(summary(edata),"summary_stats_edata.csv")
#Summary Statistics edata_adjresiduals
DT::datatable(summary(edata_adjresiduals[1:7,1:7]))
write.csv(summary(edata_adjresiduals),"summary_stats_edata_adjresiduals.csv")
pdf(file="Visualization_GSE33000_GSE43490_batch_before_and_after_SVA-LM.pdf",height=10,width=10)
par(mfrow=c(2,2))

#Before Correlation
# Colorbar along the heatmap represents Study or batch
nconditions <- nlevels(as.factor(pheno$Study))
npal <- colorRampPalette(brewer.pal(nconditions, "Set1"))(nconditions)
condition_colors <- npal[as.integer(pheno$Study)]
heatmap.2(cor(edata), RowSideColors=condition_colors,
          trace='none', cexRow=0.5, main='Sample correlations before SVA LM Study')

#After Correlation
# Colorbar along the heatmap represents Study
nconditions <- nlevels(as.factor(pheno$Study))
npal <- colorRampPalette(brewer.pal(nconditions, "Set1"))(nconditions)
condition_colors <- npal[as.integer(pheno$Study)]
heatmap.2(cor(edata_adjresiduals), RowSideColors=condition_colors,
          trace='none',  cexRow=0.5, main='Sample correlations after SVA LM Study')

#Before boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(edata, outline=FALSE, las=2, cex=0.25, main="before SVA LM Study", col="yellow")

#After boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(edata_adjresiduals, outline=FALSE, las=2, cex=0.25, main="after SVA LM Study", col="yellow")

#Before MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(edata, col=condition_colors, legend= "all", main="before SVA LM Study", cex=0.5)#like PCA plot

#After MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(edata_adjresiduals, col=condition_colors, legend= "all", main="after SVA LM Study", cex=0.5)#like PCA plot

#Before PCAplot, colored by Study or batch
pca0=prcomp(t(edata), center=T, scale=T)
names(pca0)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
#summary(pca0)
pca0$x[1:3,1:3]
##                          PC1        PC2       PC3
## F_67_AD_GSM1423780 -10.89664  -5.974611 12.119206
## M_88_AD_GSM1423781 -13.14602  -6.850155  2.329893
## M_62_AD_GSM1423782 -12.19850 -20.985231  3.787954
plot(x=pca0$x[,1], y=pca0$x[,2],
     cex=0,
     xlab="PC1", ylab="PC2",
     main="PC plot colored by Study or batch before SVA LM",
     font=2
)
text(x=pca0$x[,1], y=pca0$x[,2],
     labels=pheno$Age,
     col=pheno$Study,
     cex=0.7, font=2)
legend("bottomright",
       legend=c("GSE33000", "GSE43490"),
       text.col=c("red", "blue")
)

#After PCAplot, colored by Study or batch
pca0=prcomp(t(edata_adjresiduals), center=T, scale=T)
names(pca0)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
#summary(pca0)
pca0$x[1:3,1:3]
##                          PC1       PC2        PC3
## F_67_AD_GSM1423780  -3.39660  7.322029  0.7392155
## M_88_AD_GSM1423781 -39.11319 25.531541 -1.4343964
## M_62_AD_GSM1423782 -46.60163  0.569492 -8.6183091
plot(x=pca0$x[,1], y=pca0$x[,2],
     cex=0,
     xlab="PC1", ylab="PC2",
     main="PC plot colored by Study or batch after SVA LM",
     font=2
)
text(x=pca0$x[,1], y=pca0$x[,2],
     labels=pheno$Age,
     col=pheno$Study,
     cex=0.7, font=2)
legend("bottomright",
       legend=c("GSE33000", "GSE43490"),
       text.col=c("red", "blue")
)

dev.off()
## quartz_off_screen 
##                 2

Step18: we can export the Expression data for WGCNA

#Export edata_adjustedresidual SVA and LM normalized data for WGCNA
write.csv(edata_adjresiduals, "AgeDiseaseInterest_edata_adjresiduals_GeneID.csv")

save.image(file="SVALM_temp.RData")

Human WGCNA Disease and Age

Step19: Load libraries, setting working directories and Import data for WGCNA

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/HuAgeDis11-30-18"
#Load additional functions required for this pipeline
#Reference: Miller, J.A., Horvath, S., and Geschwind, D.H. (2010). Divergence of Human and mouse brain transcriptome highlights Alzheimer disease pathways. Proceedings of the National Academy of Sciences of the United States of America 107, 12698-12703.

write.geneList <- function(PG, filename, allProbes=0, allGenes=0, probe="g")
{
## These functions write a genelist / probelist to a file of geneNames

## USER inputs
# PG = the probe/gene you want written to a gene list
# allProbes = the list of probe names for the above probes
# allGenes = the list of gene names for the corresponding probes
# filename = the filename (can include folder)
# probe = the default ("g") says PG is a gene and doesn''t need to be converted
#         to a gene.  Otherwise PG is assumed to be a probe and converted

gene = PG
if (probe!="g") {
  gene = probe2Gene(PG,allProbes,allGenes)
}
write(gene,filename,sep="\n")

}

cor.test.l = function(x){
## Performs a Pearson correlation on a vector of genes
 ct = cor.test(x,var)
 return(c(ct$est,ct$p.val))
}
#library(BiocInstaller)
#biocLite("qvalue")
#install.packages(c("impute","dynamicTreeCut","flashClust","Hmisc","WGCNA","stringi","enrichR"))
library(impute)
library(dynamicTreeCut)
library(qvalue)
library(flashClust)
library(Hmisc)
library(WGCNA)
## ==========================================================================
## *
## *  Package WGCNA 1.66 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=8
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=8
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================
library(stringi)
library(stringr)
library(enrichR)#for pathway analysis
options(stringsAsFactors = FALSE)
#Human Metadata
metadata=read.csv("./InputHuAgeDis/merge_GSE33000_GSE43490_metadata_SampleID_Disease_single.csv", header=T, sep=',')
metadata_1=metadata[,3:7]
rownames(metadata_1)=metadata$Sample_name
pheno=metadata_1

#Human Metadata coded
metadata_coded=read.csv("./InputHuAgeDis/merge_GSE33000_GSE43490_metadata_SampleID_Disease_single_coded.csv", header=T, sep=',')
metadata_1_coded=metadata_coded[,3:7]
rownames(metadata_1_coded)=metadata$Sample_name
pheno_coded=metadata_1_coded

#Human Expression data 
Expr_HuGene=read.csv("AgeDiseaseInterest_edata_adjresiduals_GeneID.csv", header =T, sep=',')
Expr_HuGene_1=Expr_HuGene[,-1]
rownames(Expr_HuGene_1)=Expr_HuGene$X
Expr_HuGene_1=Expr_HuGene_1[complete.cases(Expr_HuGene_1), ]
edata=Expr_HuGene_1
#Human Aging data
datExprA1g=edata
metadataA1g=pheno
metadataA1g_coded=pheno_coded
#Remove unused variables
rm(metadata)
rm(metadata_1)
rm(pheno)
rm(metadata_coded)
rm(metadata_1_coded)
rm(pheno_coded)
rm(Expr_HuGene)
rm(Expr_HuGene_1)
rm(edata)
DT::datatable(datExprA1g[1:7,1:7])
dim(datExprA1g)
## [1] 7301  637

Step20: Removal of outliers and missing values using WGCNA package function ‘goodSamplesGenes’

#Before applying 'goodSamplesGenes'
dim(datExprA1g)
## [1] 7301  637
#check and applying 'goodSamplesGenes' on data A1
gsg = goodSamplesGenes(datExprA1g, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gsg$allOK
## [1] TRUE
if (!gsg$allOK)
{
if (sum(!gsg$goodGenes)>0)
#printFlush(paste("Removing genes:", paste(names(datExprA1g)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
#printFlush(paste("Removing samples:", paste(rownames(datExprA1g)[!gsg$goodSamples], collapse = ", ")));
# Removal step
datExprA1g = datExprA1g[gsg$goodSamples, gsg$goodGenes]
}
#After applying 'goodSamplesGenes'
dim(datExprA1g)
## [1] 7301  637

Step 21: Optional: This step allows selection of top 7000 most variable genes, not most expressed genes (this minimizes noise and is compuationally efficient)

In the present analysis we have used all 7301 genes

#Transpose data
datExprA1g1=as.data.frame(t(datExprA1g))
names(datExprA1g1)=rownames(datExprA1g)
rownames(datExprA1g1)=names(datExprA1g)
DT::datatable(datExprA1g1[1:7,1:7])
#Calculate variance
var = apply(datExprA1g1, 2, var)
dat = rbind(datExprA1g1,var)
rownames(dat) = c(rownames(datExprA1g1), "variance")
#order columns by variance
dat1 = dat[,order(dat["variance",], decreasing=T)]
#Remove row containing variance values
#Here we select all genes
dat2 = dat1[1:dim(datExprA1g1)[1],1:dim(datExprA1g1)[2]]
#To select only a given number of top genes like top7000 uncomment line below
#dat2 = dat1[1:dim(datExprA1g1)[1],1:7000]

datExprA1g2=as.data.frame(t(dat2))
names(datExprA1g2)=rownames(dat2)
rownames(datExprA1g2)=names(dat2)
#remove unused variables
rm(var)

rm(dat1)
#Comparison of data A1 after variance based selction
dim(datExprA1g)
## [1] 7301  637
DT::datatable(datExprA1g[1:7,1:7])
dim(datExprA1g2)
## [1] 7301  637
DT::datatable(datExprA1g2[1:7,1:7])

here onwards will use datExprA1g2

Step22: Pick soft power and detect modules in data A1

#softPower estimate after 'goodSamplesgoodGenes' and variance based selection
#Pick power for the data A1
powers = c(c(1:10), seq(from = 12, to=40, by=2))
sftA1 = pickSoftThreshold(datExprA1g2, RsquaredCut=0.80, powerVector = powers, networkType="signed", moreNetworkConcepts=TRUE, verbose = 5, blockSize=5000)
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 637 of 637
##    Power SFT.R.sq   slope truncated.R.sq  mean.k. median.k.  max.k.
## 1      1  0.89500 14.3000          0.960 448.0000  454.0000 485.000
## 2      2  0.84300  6.2100          0.939 319.0000  327.0000 372.000
## 3      3  0.73500  3.3600          0.879 229.0000  236.0000 288.000
## 4      4  0.63000  1.9900          0.861 167.0000  172.0000 225.000
## 5      5  0.48500  1.3200          0.835 122.0000  127.0000 177.000
## 6      6  0.36400  0.8990          0.839  90.2000   93.0000 140.000
## 7      7  0.16300  0.5120          0.779  67.2000   68.8000 112.000
## 8      8  0.00733  0.0944          0.724  50.4000   51.3000  91.000
## 9      9  0.02070 -0.1650          0.715  38.0000   38.2000  74.000
## 10    10  0.11500 -0.4210          0.682  28.9000   28.5000  60.500
## 11    12  0.32100 -0.7430          0.783  17.0000   16.0000  40.900
## 12    14  0.44600 -1.0800          0.791  10.2000    9.1900  28.100
## 13    16  0.51500 -1.2900          0.843   6.2300    5.2700  19.500
## 14    18  0.59500 -1.3700          0.884   3.8800    3.1200  13.700
## 15    20  0.65200 -1.4300          0.913   2.4500    1.8700   9.660
## 16    22  0.69600 -1.4900          0.933   1.5700    1.1100   6.880
## 17    24  0.72900 -1.4900          0.954   1.0200    0.6640   4.930
## 18    26  0.79500 -1.4800          0.974   0.6690    0.4140   3.550
## 19    28  0.81600 -1.4700          0.979   0.4440    0.2540   2.580
## 20    30  0.81300 -1.5000          0.969   0.2980    0.1600   1.870
## 21    32  0.81000 -1.5500          0.942   0.2010    0.1030   1.370
## 22    34  0.78700 -1.5500          0.890   0.1370    0.0645   1.010
## 23    36  0.83700 -1.5600          0.961   0.0946    0.0405   0.763
## 24    38  0.84800 -1.6000          0.956   0.0656    0.0256   0.582
## 25    40  0.80800 -1.6600          0.877   0.0459    0.0162   0.447
##     Density Centralization Heterogeneity
## 1  7.04e-01       0.058900        0.0531
## 2  5.01e-01       0.084400        0.1030
## 3  3.61e-01       0.092500        0.1500
## 4  2.62e-01       0.091900        0.1960
## 5  1.92e-01       0.086300        0.2400
## 6  1.42e-01       0.078500        0.2840
## 7  1.06e-01       0.071500        0.3260
## 8  7.92e-02       0.064000        0.3670
## 9  5.98e-02       0.056700        0.4080
## 10 4.54e-02       0.049800        0.4480
## 11 2.67e-02       0.037800        0.5260
## 12 1.60e-02       0.028200        0.6010
## 13 9.80e-03       0.020900        0.6740
## 14 6.09e-03       0.015400        0.7440
## 15 3.85e-03       0.011400        0.8130
## 16 2.47e-03       0.008380        0.8790
## 17 1.60e-03       0.006170        0.9440
## 18 1.05e-03       0.004550        1.0100
## 19 6.98e-04       0.003360        1.0700
## 20 4.68e-04       0.002490        1.1300
## 21 3.17e-04       0.001840        1.1900
## 22 2.16e-04       0.001370        1.2500
## 23 1.49e-04       0.001050        1.3100
## 24 1.03e-04       0.000815        1.3700
## 25 7.21e-05       0.000633        1.4300
par(mfrow = c(1,2));
cex1 = 0.9;
#scale-free topology fit index as function of soft-thresholding power
plot(sftA1$fitIndices[,1], -sign(sftA1$fitIndices[,3])*sftA1$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sftA1$fitIndices[,1], -sign(sftA1$fitIndices[,3])*sftA1$fitIndices[,2],
labels=powers,cex=cex1,col="red");
#this line corresponds to using R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sftA1$fitIndices[,1], sftA1$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sftA1$fitIndices[,1], sftA1$fitIndices[,5], labels=powers, cex=cex1,col="red")

dev.print(pdf, "A1_auto-power_plot.pdf",height=10,width=18)
## quartz_off_screen 
##                 2
#sftA1$powerEstimate is systems best guess
softPowerA1=sftA1$powerEstimate
softPowerA1
## [1] 1
#set soft power to value 12 for signed networks if above automatic detection detects low power.
softPowerA1=12
#Calculate weighted adjacency
adjacencyA1 = adjacency(t(datExprA1g2),power=softPowerA1,type="signed");
diag(adjacencyA1)=0
#Turn adjacency into topology overlap matrix
TOM=TOMsimilarity(adjacencyA1, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOMA1 = 1-TOM
#Hierarchial clustering based on TOM
geneTreeA1 = flashClust(as.dist(dissTOMA1), method="average")
#Display the networks visually:
#pdf("dendrogram.pdf",height=6,width=16)
#par(mar=c(1,1,1,1))
#par(mfrow=c(1,2))
plot(geneTreeA1,xlab="",sub="",main="Gene clustering on TOM-based dissimilarity (A1)",
labels=FALSE,hang=0.04);

#dev.off() 
dev.print(pdf,file="dendrogram.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
#Modules for dataset A1 or modulesA1
mColorh=NULL
mColorhL=NULL
for (ds in 0:3){
tree = cutreeHybrid(dendro = geneTreeA1, pamStage=TRUE,
minClusterSize = (30-3*ds), cutHeight = 0.99,
deepSplit = ds, distM = dissTOMA1)
mColorh=cbind(mColorh,labels2colors(tree$labels));
mColorhL=cbind(mColorhL, paste("HuAgeDis_", str_pad(tree$labels, 2, pad="0"), sep="" ))
}
##  ..done.
##  ..done.
##  ..done.
##  ..done.
#pdf("Module_choices.pdf", height=10,width=25);
plotDendroAndColors(geneTreeA1, mColorh, paste("dpSplt =",0:3), main = "",dendroLabels=FALSE);

#dev.off()
dev.print(pdf,file="Module_choices.pdf", height=5,width=10)
## quartz_off_screen 
##                 2
#Picked deepSplit or dpSplit 2 which gives small number of large modules
modulesA1 = mColorh[,3] #color labels of modules
modulesA1L= mColorhL[,3] #numeric label of modules
#number of genes per module
#Principal component for visualization
PCs1A = moduleEigengenes(t(datExprA1g2), colors=modulesA1)
ME_1A = PCs1A$eigengenes
distPC1A = 1-abs(cor(ME_1A,use="p"))
distPC1A = ifelse(is.na(distPC1A), 0, distPC1A)
pcTree1A = hclust(as.dist(distPC1A),method="a")
MDS_1A = cmdscale(as.dist(distPC1A),2)
colorsA1 = names(table(modulesA1))

par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(pcTree1A, xlab="",ylab="",main="",sub="")

dev.print(pdf,file="ModuleEigengeneVisualizationsTree.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(MDS_1A, col= colorsA1, main="MDS plot", cex=2, pch=19)

dev.print(pdf,file="ModuleEigengeneVisualizationsMDS.pdf",height=5,width=5)
## quartz_off_screen 
##                 2
PCs1AL = moduleEigengenes(t(datExprA1g2), colors=modulesA1L)
ME_1AL = PCs1AL$eigengenes
distPC1AL = 1-abs(cor(ME_1AL,use="p"))
distPC1AL = ifelse(is.na(distPC1AL), 0, distPC1AL)
pcTree1AL = hclust(as.dist(distPC1AL),method="a")
MDS_1AL = cmdscale(as.dist(distPC1AL),2)
colorsA1L = names(table(modulesA1L))

par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(pcTree1AL, xlab="",ylab="",main="",sub="")

dev.print(pdf,file="ModuleEigengeneVisualizationsTreelabels.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
#plot(MDS_1AL, col= colorsA1L, main="MDS plot", cex=2, pch=19)
#dev.print(pdf,file="ModuleEigengeneVisualizationsMDSlabels.pdf",height=5,width=5)

Step23A: Module membership (kME) for network comparison and identification of hub genes

#kME to measure of correlations between each gene and each ME
#for data A1
geneModuleMembership1 = signedKME(t(datExprA1g2), ME_1A)
colnames(geneModuleMembership1)=paste("PC",colorsA1,".cor",sep="");
MMPvalue1=corPvalueStudent(as.matrix(geneModuleMembership1),dim(datExprA1g2)[[2]]);
colnames(MMPvalue1)=paste("PC",colorsA1,".pval",sep="");
Gene = rownames(datExprA1g2)
kMEtable1 = cbind(Gene,Gene,modulesA1)
for (i in 1:length(colorsA1))
kMEtable1 = cbind(kMEtable1, geneModuleMembership1[,i], MMPvalue1[,i])
colnames(kMEtable1)=c("PSID","Gene","Module",sort(c(colnames(geneModuleMembership1),
colnames(MMPvalue1))))
write.csv(kMEtable1,"kMEtable1colors.csv")
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
#ref: modified from https://support.bioconductor.org/p/94701/
topGenesKME = NULL
for (c in 1:length(colorsA1)){
kMErank1 = rank(-geneModuleMembership1[,c])
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
#topGenesKME = cbind(topGenesKME,Gene[maxKMErank<=10])
topGenesKME = cbind(topGenesKME,Gene[order(kMErank1, decreasing=FALSE)][1:10])
}
colnames(topGenesKME) = colorsA1
topGenesKME
##       black     blue      brown      green      greenyellow grey      
##  [1,] "TBXAS1"  "PRKD3"   "RAF1"     "LRP10"    "CFH"       "ANXA4"   
##  [2,] "TYROBP"  "AFF1"    "SAFB2"    "MAPKAPK2" "ANXA2P1"   "PAN3"    
##  [3,] "PTPRC"   "NFE2L2"  "MAPKAPK2" "METTL7A"  "KIAA1199"  "C6orf70" 
##  [4,] "ITGB2"   "PTTG1IP" "PHF19"    "CD59"     "ANXA2"     "KIAA1407"
##  [5,] "MYO1F"   "DYNLT1"  "SIPA1"    "SERPINB6" "FRZB"      "RAPGEF6" 
##  [6,] "LST1"    "MAPRE1"  "AUP1"     "INPPL1"   "DCN"       "ZKSCAN1" 
##  [7,] "CYBA"    "TBC1D2B" "KLHL21"   "AFF1"     "FBLN5"     "EEF1A1"  
##  [8,] "SLC7A7"  "NPC2"    "ZNF646"   "TBC1D2B"  "FMOD"      "DNAJC10" 
##  [9,] "RNASET2" "CD302"   "RBM4"     "ZFP36L1"  "SVIL"      "ADD3"    
## [10,] "FCER1G"  "LRP10"   "C1orf144" "TBL1X"    "BMP6"      "DHTKD1"  
##       magenta    pink      purple    red        turquoise yellow    
##  [1,] "DCTN3"    "SYP"     "P4HA2"   "ACTR3B"   "NRXN3"   "HSF1"    
##  [2,] "PARD6A"   "SSBP4"   "CHORDC1" "ACOT7"    "GOT1"    "PHF1"    
##  [3,] "SUMO3"    "ATP6V0C" "STIP1"   "C10orf35" "MDH2"    "IRF2BP1" 
##  [4,] "CUEDC2"   "RUSC1"   "DEDD2"   "SVOP"     "NRN1"    "HECTD3"  
##  [5,] "RUVBL2"   "PARD6A"  "DNAJA1"  "CLTA"     "TOMM20"  "FAM89B"  
##  [6,] "HRAS"     "GSK3A"   "MKNK2"   "LARGE"    "NAP1L5"  "ANKRD13B"
##  [7,] "ASNA1"    "LPHN1"   "SPR"     "NFKBIE"   "ATP6AP2" "ADRM1"   
##  [8,] "COX5A"    "DLGAP4"  "DNAJB1"  "C6orf154" "CMAS"    "ZNF574"  
##  [9,] "MRPL46"   "CFL1"    "BAG3"    "TRIM9"    "GPRASP1" "FAM100A" 
## [10,] "C17orf81" "CBX4"    "HSPH1"   "TTC9B"    "NKIRAS1" "DEDD"
write.csv(topGenesKME,"A1_HubGenestopGenesKMEcolorsRankSorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
topGenesKME = NULL
for (c in 1:length(colorsA1)){
kMErank1 = rank(-geneModuleMembership1[,c])
maxKMErank = rank(apply(cbind(kMErank1),1,max))
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
topGenesKME = cbind(topGenesKME,Gene[maxKMErank<=10])
}
colnames(topGenesKME) = colorsA1
topGenesKME
##       black     blue      brown      green      greenyellow grey      
##  [1,] "TYROBP"  "CD302"   "SIPA1"    "ZFP36L1"  "FMOD"      "EEF1A1"  
##  [2,] "TBXAS1"  "LRP10"   "PHF19"    "CD59"     "KIAA1199"  "ADD3"    
##  [3,] "FCER1G"  "PTTG1IP" "MAPKAPK2" "METTL7A"  "FRZB"      "KIAA1407"
##  [4,] "ITGB2"   "NFE2L2"  "KLHL21"   "LRP10"    "CFH"       "RAPGEF6" 
##  [5,] "PTPRC"   "NPC2"    "SAFB2"    "MAPKAPK2" "ANXA2"     "PAN3"    
##  [6,] "SLC7A7"  "PRKD3"   "RBM4"     "AFF1"     "ANXA2P1"   "ANXA4"   
##  [7,] "LST1"    "AFF1"    "C1orf144" "INPPL1"   "DCN"       "ZKSCAN1" 
##  [8,] "MYO1F"   "MAPRE1"  "ZNF646"   "TBL1X"    "BMP6"      "DHTKD1"  
##  [9,] "RNASET2" "DYNLT1"  "AUP1"     "SERPINB6" "SVIL"      "C6orf70" 
## [10,] "CYBA"    "TBC1D2B" "RAF1"     "TBC1D2B"  "FBLN5"     "DNAJC10" 
##       magenta    pink      purple    red        turquoise yellow    
##  [1,] "PARD6A"   "LPHN1"   "BAG3"    "SVOP"     "NRN1"    "IRF2BP1" 
##  [2,] "SUMO3"    "CBX4"    "DNAJB1"  "ACTR3B"   "NAP1L5"  "FAM100A" 
##  [3,] "COX5A"    "ATP6V0C" "MKNK2"   "C6orf154" "NRXN3"   "PHF1"    
##  [4,] "HRAS"     "DLGAP4"  "CHORDC1" "TRIM9"    "TOMM20"  "HECTD3"  
##  [5,] "MRPL46"   "SYP"     "P4HA2"   "TTC9B"    "GOT1"    "HSF1"    
##  [6,] "RUVBL2"   "CFL1"    "HSPH1"   "NFKBIE"   "GPRASP1" "ANKRD13B"
##  [7,] "DCTN3"    "SSBP4"   "DNAJA1"  "CLTA"     "NKIRAS1" "ZNF574"  
##  [8,] "C17orf81" "PARD6A"  "SPR"     "ACOT7"    "ATP6AP2" "ADRM1"   
##  [9,] "ASNA1"    "GSK3A"   "DEDD2"   "C10orf35" "MDH2"    "FAM89B"  
## [10,] "CUEDC2"   "RUSC1"   "STIP1"   "LARGE"    "CMAS"    "DEDD"
write.csv(topGenesKME,"A1_HubGenestopGenesKMEcolorsRankUnsorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.

Same as above for labeled modules

#kME to measure of correlations between each gene and each ME
#for data A1
geneModuleMembership1L = signedKME(t(datExprA1g2), ME_1AL)
colnames(geneModuleMembership1L)=paste("PC",colorsA1L,".cor",sep="");
MMPvalue1L=corPvalueStudent(as.matrix(geneModuleMembership1L),dim(datExprA1g2)[[2]]);
colnames(MMPvalue1L)=paste("PC",colorsA1L,".pval",sep="");
GeneL = rownames(datExprA1g2)
kMEtable1L = cbind(GeneL,GeneL,modulesA1L)
for (i in 1:length(colorsA1L))
kMEtable1L = cbind(kMEtable1L, geneModuleMembership1L[,i], MMPvalue1L[,i])
colnames(kMEtable1L)=c("PSID","Gene","Module",sort(c(colnames(geneModuleMembership1L),
colnames(MMPvalue1L))))
write.csv(kMEtable1L,"kMEtable1labels.csv")
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
#ref: modified from https://support.bioconductor.org/p/94701/
topGenesKMEL = NULL
for (c in 1:length(colorsA1L)){
kMErank1L = rank(-geneModuleMembership1L[,c])
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
#topGenesKMEL = cbind(topGenesKMEL,GeneL[maxKMErankL<=10])
topGenesKMEL = cbind(topGenesKMEL,GeneL[order(kMErank1L, decreasing=FALSE)][1:10])
}
colnames(topGenesKMEL) = colorsA1L
topGenesKMEL
##       HuAgeDis_00 HuAgeDis_01 HuAgeDis_02 HuAgeDis_03 HuAgeDis_04
##  [1,] "ANXA4"     "NRXN3"     "PRKD3"     "RAF1"      "HSF1"     
##  [2,] "PAN3"      "GOT1"      "AFF1"      "SAFB2"     "PHF1"     
##  [3,] "C6orf70"   "MDH2"      "NFE2L2"    "MAPKAPK2"  "IRF2BP1"  
##  [4,] "KIAA1407"  "NRN1"      "PTTG1IP"   "PHF19"     "HECTD3"   
##  [5,] "RAPGEF6"   "TOMM20"    "DYNLT1"    "SIPA1"     "FAM89B"   
##  [6,] "ZKSCAN1"   "NAP1L5"    "MAPRE1"    "AUP1"      "ANKRD13B" 
##  [7,] "EEF1A1"    "ATP6AP2"   "TBC1D2B"   "KLHL21"    "ADRM1"    
##  [8,] "DNAJC10"   "CMAS"      "NPC2"      "ZNF646"    "ZNF574"   
##  [9,] "ADD3"      "GPRASP1"   "CD302"     "RBM4"      "FAM100A"  
## [10,] "DHTKD1"    "NKIRAS1"   "LRP10"     "C1orf144"  "DEDD"     
##       HuAgeDis_05 HuAgeDis_06 HuAgeDis_07 HuAgeDis_08 HuAgeDis_09
##  [1,] "LRP10"     "ACTR3B"    "TBXAS1"    "SYP"       "DCTN3"    
##  [2,] "MAPKAPK2"  "ACOT7"     "TYROBP"    "SSBP4"     "PARD6A"   
##  [3,] "METTL7A"   "C10orf35"  "PTPRC"     "ATP6V0C"   "SUMO3"    
##  [4,] "CD59"      "SVOP"      "ITGB2"     "RUSC1"     "CUEDC2"   
##  [5,] "SERPINB6"  "CLTA"      "MYO1F"     "PARD6A"    "RUVBL2"   
##  [6,] "INPPL1"    "LARGE"     "LST1"      "GSK3A"     "HRAS"     
##  [7,] "AFF1"      "NFKBIE"    "CYBA"      "LPHN1"     "ASNA1"    
##  [8,] "TBC1D2B"   "C6orf154"  "SLC7A7"    "DLGAP4"    "COX5A"    
##  [9,] "ZFP36L1"   "TRIM9"     "RNASET2"   "CFL1"      "MRPL46"   
## [10,] "TBL1X"     "TTC9B"     "FCER1G"    "CBX4"      "C17orf81" 
##       HuAgeDis_10 HuAgeDis_11
##  [1,] "P4HA2"     "CFH"      
##  [2,] "CHORDC1"   "ANXA2P1"  
##  [3,] "STIP1"     "KIAA1199" 
##  [4,] "DEDD2"     "ANXA2"    
##  [5,] "DNAJA1"    "FRZB"     
##  [6,] "MKNK2"     "DCN"      
##  [7,] "SPR"       "FBLN5"    
##  [8,] "DNAJB1"    "FMOD"     
##  [9,] "BAG3"      "SVIL"     
## [10,] "HSPH1"     "BMP6"
write.csv(topGenesKMEL,"A1_HubGenestopGenesKMElabelsRankSorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
topGenesKMEL = NULL
for (c in 1:length(colorsA1L)){
kMErank1L = rank(-geneModuleMembership1L[,c])
maxKMErankL = rank(apply(cbind(kMErank1L),1,max))
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
topGenesKMEL = cbind(topGenesKMEL,GeneL[maxKMErankL<=10])
}
colnames(topGenesKMEL) = colorsA1L
topGenesKMEL
##       HuAgeDis_00 HuAgeDis_01 HuAgeDis_02 HuAgeDis_03 HuAgeDis_04
##  [1,] "EEF1A1"    "NRN1"      "CD302"     "SIPA1"     "IRF2BP1"  
##  [2,] "ADD3"      "NAP1L5"    "LRP10"     "PHF19"     "FAM100A"  
##  [3,] "KIAA1407"  "NRXN3"     "PTTG1IP"   "MAPKAPK2"  "PHF1"     
##  [4,] "RAPGEF6"   "TOMM20"    "NFE2L2"    "KLHL21"    "HECTD3"   
##  [5,] "PAN3"      "GOT1"      "NPC2"      "SAFB2"     "HSF1"     
##  [6,] "ANXA4"     "GPRASP1"   "PRKD3"     "RBM4"      "ANKRD13B" 
##  [7,] "ZKSCAN1"   "NKIRAS1"   "AFF1"      "C1orf144"  "ZNF574"   
##  [8,] "DHTKD1"    "ATP6AP2"   "MAPRE1"    "ZNF646"    "ADRM1"    
##  [9,] "C6orf70"   "MDH2"      "DYNLT1"    "AUP1"      "FAM89B"   
## [10,] "DNAJC10"   "CMAS"      "TBC1D2B"   "RAF1"      "DEDD"     
##       HuAgeDis_05 HuAgeDis_06 HuAgeDis_07 HuAgeDis_08 HuAgeDis_09
##  [1,] "ZFP36L1"   "SVOP"      "TYROBP"    "LPHN1"     "PARD6A"   
##  [2,] "CD59"      "ACTR3B"    "TBXAS1"    "CBX4"      "SUMO3"    
##  [3,] "METTL7A"   "C6orf154"  "FCER1G"    "ATP6V0C"   "COX5A"    
##  [4,] "LRP10"     "TRIM9"     "ITGB2"     "DLGAP4"    "HRAS"     
##  [5,] "MAPKAPK2"  "TTC9B"     "PTPRC"     "SYP"       "MRPL46"   
##  [6,] "AFF1"      "NFKBIE"    "SLC7A7"    "CFL1"      "RUVBL2"   
##  [7,] "INPPL1"    "CLTA"      "LST1"      "SSBP4"     "DCTN3"    
##  [8,] "TBL1X"     "ACOT7"     "MYO1F"     "PARD6A"    "C17orf81" 
##  [9,] "SERPINB6"  "C10orf35"  "RNASET2"   "GSK3A"     "ASNA1"    
## [10,] "TBC1D2B"   "LARGE"     "CYBA"      "RUSC1"     "CUEDC2"   
##       HuAgeDis_10 HuAgeDis_11
##  [1,] "BAG3"      "FMOD"     
##  [2,] "DNAJB1"    "KIAA1199" 
##  [3,] "MKNK2"     "FRZB"     
##  [4,] "CHORDC1"   "CFH"      
##  [5,] "P4HA2"     "ANXA2"    
##  [6,] "HSPH1"     "ANXA2P1"  
##  [7,] "DNAJA1"    "DCN"      
##  [8,] "SPR"       "BMP6"     
##  [9,] "DEDD2"     "SVIL"     
## [10,] "STIP1"     "FBLN5"
write.csv(topGenesKMEL,"A1_HubGenestopGenesKMElabelsRankUnsorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.

Step23B: Intramodular connectivity (kIM or kWithin) and whole network connectivity (kTotal) for network comparison

#Ref: WGCNA tutorial Steve Horvath Peter Langfelder 2011
#intramodular connectivity is mathematically the sum of module edge weights or "degree" of connectivity between a given node or gene and other genes or nodes within the module. 
#For colors
IntraModularConnectivity1=intramodularConnectivity(adjacencyA1,modulesA1)
#We get the color labels for the genes 
GeneModule1=kMEtable1[,c("PSID","Gene","Module")]
rownames(GeneModule1)=rownames(kMEtable1)
#merge the intramodular connectivity dataframe with module names by rownames that are gene symbols
IntraModularConnectivity2=merge(GeneModule1,IntraModularConnectivity1, by="row.names")
#drop the extra row.name column
IntraModularConnectivity3=IntraModularConnectivity2[,-c(1)]
DT::datatable(head(IntraModularConnectivity3))
#export kWithin and kTotal for all genes 
write.csv(IntraModularConnectivity1,"kWithintable1colors.csv")

Same as above for labeled modules

#Ref: WGCNA tutorial Steve Horvath Peter Langfelder 2011
#intramodular connectivity is mathematically the sum of module edge weights or "degree" of connectivity between a given node or gene and other genes or nodes within the module. 
#For colors
IntraModularConnectivity1L=intramodularConnectivity(adjacencyA1,modulesA1L)
#We get the number labels for the genes 
GeneModule1L=kMEtable1L[,c("PSID","Gene","Module")]
rownames(GeneModule1L)=rownames(kMEtable1L)
#merge the intramodular connectivity dataframe with module names by rownames that are gene symbols
IntraModularConnectivity2L=merge(GeneModule1L,IntraModularConnectivity1L, by="row.names")
#drop the extra row.name colums
IntraModularConnectivity3L=IntraModularConnectivity2L[,-c(1)]
DT::datatable(head(IntraModularConnectivity3L))
#export kWithin and kTotal for all genes 
write.csv(IntraModularConnectivity1L,"kWithintable1labels.csv")

Step24: Generating files for doing GO, KEEG and Reactome annotation of genes in a given module

#Make a folder first called 'Module_GeneskMEHubs' change full path below as per your system
dir.create("./Module_GeneskMEHubs")
folder = "Module_GeneskMEHubs/"
for (c in colorsA1){
fn = paste(folder, c, ".txt",sep="");
write.geneList(Gene[modulesA1==c], fn)
};
write(Gene,paste(folder,"all.txt",sep=""))
#The folder should now have files titled "<color>.txt" that contain the gene names of every gene in that module,as well as a file titled "all.txt" that has every gene in your data set.
#make a summary file of all the genes in modules, similar to kME files but with only genes and module names as above
geneListsModuleSummary=cbind(Gene,modulesA1)
write.csv(geneListsModuleSummary,"geneListsModuleSummarycolors.csv")

Same as above for labeled modules

#Make a folder first called 'Module_GeneskMEHubs' change full path below as per your system
folder = "Module_GeneskMEHubs/"
for (c in colorsA1L){
fn = paste(folder, c, ".txt",sep="");
write.geneList(GeneL[modulesA1L==c], fn)
};
write(GeneL,paste(folder,"allL.txt",sep=""))
#The folder should now have files titled "<color>.txt" that contain the gene names of every gene in that module,as well as a file titled "all.txt" that has every gene in your data set.
#make a summary file of all the genes in modules, similar to kME files but with only genes and module names as above
geneListsModuleSummaryL=cbind(GeneL,modulesA1L)
write.csv(geneListsModuleSummaryL,"geneListsModuleSummarylabels.csv")

Step25: Visualization of trait module relatonships for data A1

In this step we use the coded metadata files

Relating modules to physiological traits for A1

# For data A1
# Define numbers of genes and samples
nGenesA1 = nrow(datExprA1g2)
nSamplesA1 = ncol(datExprA1g2)
# Recalculate MEs with color labels
MEs0A1= moduleEigengenes(t(datExprA1g2),modulesA1)$eigengenes
MEsA1= orderMEs(MEs0A1)
modTraitCorA1 = cor(MEsA1, metadataA1g_coded, use = "p")
modTraitPA1 = corPvalueStudent(modTraitCorA1, nSamplesA1)
textMatrixA1 = paste(signif(modTraitCorA1, 2), "\n(",
signif(modTraitPA1, 1), ")", sep = "")
dim(textMatrixA1) = dim(modTraitCorA1)
par(mar = c(6, 8.5, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = modTraitCorA1, xLabels = names(metadataA1g_coded),
yLabels = names(MEsA1), ySymbols = names(MEsA1), 
colorLabels =FALSE,colors=greenWhiteRed(50),textMatrix=textMatrixA1,
setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1),
main = paste("A1_Module-trait relationships colors"))

dev.print(pdf,"A1_relating modules to trait colors.pdf", width=5, height=5)
## quartz_off_screen 
##                 2
#This is for color module trait table
colnames(modTraitPA1) = paste("p.value.", colnames(modTraitCorA1), sep="");
out3<-cbind(Module=rownames(modTraitCorA1), modTraitCorA1, modTraitPA1)
dim(out3)
## [1] 12 11
write.table(out3, "A1_relating modules to trait colors.csv", sep=",",row.names=F)

Same as above for labeled modules

Relating modules to physiological traits for A1

# For data A1
# Define numbers of genes and samples
nGenesA1L = nrow(datExprA1g2)
nSamplesA1L = ncol(datExprA1g2)
# Recalculate MEs with color labels
MEs0A1L= moduleEigengenes(t(datExprA1g2),modulesA1L)$eigengenes
MEsA1L= orderMEs(MEs0A1L)
modTraitCorA1L = cor(MEsA1L, metadataA1g_coded, use = "p")
modTraitPA1L = corPvalueStudent(modTraitCorA1L, nSamplesA1L)
textMatrixA1L = paste(signif(modTraitCorA1L, 2), "\n(",
signif(modTraitPA1L, 1), ")", sep = "")
dim(textMatrixA1L) = dim(modTraitCorA1L)
par(mar = c(6, 8.5, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = modTraitCorA1L, xLabels = names(metadataA1g_coded),
yLabels = names(MEsA1L), ySymbols = names(MEsA1L), 
colorLabels =FALSE,colors=greenWhiteRed(50),textMatrix=textMatrixA1L,
setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1),
main = paste("A1_Module-trait relationships labels"))

dev.print(pdf,"A1_relating modules to trait labels.pdf", width=5, height=5)
## quartz_off_screen 
##                 2
#This is for label module trait table
colnames(modTraitPA1L) = paste("p.value.", colnames(modTraitCorA1L), sep="");
out3L<-cbind(Module=rownames(modTraitCorA1L), modTraitCorA1L, modTraitPA1L)
dim(out3L)
## [1] 12 11
write.table(out3L, "A1_relating modules to trait labels.csv", sep=",",row.names=F)
save.image(file="temp.RData")
rm(list=ls())
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  8682105 463.7   26482191 1414.4         NA  26482191 1414.4
## Vcells 23411406 178.7  418296004 3191.4     102400 570842476 4355.2
#To reload uncomment code below
#load(file="temp.RData")

Step26: Additional step for conversion of gene symbols mouse to Human gene symbols

load(file="temp.RData")
library(Rcpp)
library(stringi)
library(biomaRt)
#The useMart() function can now be used to connect to a specified BioMart database, this must be a valid name given by listMarts(). In the next example we choose to query the Ensembl BioMart database.
listMarts()
##                biomart               version
## 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 94
## 2   ENSEMBL_MART_MOUSE      Mouse strains 94
## 3     ENSEMBL_MART_SNP  Ensembl Variation 94
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 94
ensembl=useMart("ensembl")
listDatasets(ensembl)
##                            dataset
## 1         acalliptera_gene_ensembl
## 2       acarolinensis_gene_ensembl
## 3        acitrinellus_gene_ensembl
## 4        amelanoleuca_gene_ensembl
## 5          amexicanus_gene_ensembl
## 6          anancymaae_gene_ensembl
## 7          aocellaris_gene_ensembl
## 8            apercula_gene_ensembl
## 9      aplatyrhynchos_gene_ensembl
## 10      apolyacanthus_gene_ensembl
## 11       atestudineus_gene_ensembl
## 12            btaurus_gene_ensembl
## 13            caperea_gene_ensembl
## 14              catys_gene_ensembl
## 15         ccapucinus_gene_ensembl
## 16         cchok1gshd_gene_ensembl
## 17            ccrigri_gene_ensembl
## 18           celegans_gene_ensembl
## 19        cfamiliaris_gene_ensembl
## 20            chircus_gene_ensembl
## 21         choffmanni_gene_ensembl
## 22      cintestinalis_gene_ensembl
## 23           cjacchus_gene_ensembl
## 24          clanigera_gene_ensembl
## 25         cpalliatus_gene_ensembl
## 26         cporcellus_gene_ensembl
## 27           csabaeus_gene_ensembl
## 28          csavignyi_gene_ensembl
## 29        csemilaevis_gene_ensembl
## 30          csyrichta_gene_ensembl
## 31        cvariegatus_gene_ensembl
## 32      dmelanogaster_gene_ensembl
## 33      dnovemcinctus_gene_ensembl
## 34             dordii_gene_ensembl
## 35             drerio_gene_ensembl
## 36           eburgeri_gene_ensembl
## 37          ecaballus_gene_ensembl
## 38         eeuropaeus_gene_ensembl
## 39            elucius_gene_ensembl
## 40          etelfairi_gene_ensembl
## 41        falbicollis_gene_ensembl
## 42             fcatus_gene_ensembl
## 43        fdamarensis_gene_ensembl
## 44      fheteroclitus_gene_ensembl
## 45         gaculeatus_gene_ensembl
## 46           gaffinis_gene_ensembl
## 47            ggallus_gene_ensembl
## 48           ggorilla_gene_ensembl
## 49            gmorhua_gene_ensembl
## 50           hburtoni_gene_ensembl
## 51             hcomes_gene_ensembl
## 52            hfemale_gene_ensembl
## 53              hmale_gene_ensembl
## 54           hsapiens_gene_ensembl
## 55         ipunctatus_gene_ensembl
## 56  itridecemlineatus_gene_ensembl
## 57           jjaculus_gene_ensembl
## 58        kmarmoratus_gene_ensembl
## 59          lafricana_gene_ensembl
## 60          lbergylta_gene_ensembl
## 61         lchalumnae_gene_ensembl
## 62          loculatus_gene_ensembl
## 63             malbus_gene_ensembl
## 64           marmatus_gene_ensembl
## 65           mauratus_gene_ensembl
## 66            mcaroli_gene_ensembl
## 67         mdomestica_gene_ensembl
## 68      mfascicularis_gene_ensembl
## 69              mfuro_gene_ensembl
## 70         mgallopavo_gene_ensembl
## 71       mleucophaeus_gene_ensembl
## 72         mlucifugus_gene_ensembl
## 73              mmola_gene_ensembl
## 74           mmulatta_gene_ensembl
## 75           mmurinus_gene_ensembl
## 76          mmusculus_gene_ensembl
## 77        mnemestrina_gene_ensembl
## 78       mochrogaster_gene_ensembl
## 79            mpahari_gene_ensembl
## 80           mspretus_gene_ensembl
## 81             mzebra_gene_ensembl
## 82         nbrichardi_gene_ensembl
## 83           neugenii_gene_ensembl
## 84            ngalili_gene_ensembl
## 85        nleucogenys_gene_ensembl
## 86          oanatinus_gene_ensembl
## 87             oaries_gene_ensembl
## 88         ocuniculus_gene_ensembl
## 89             odegus_gene_ensembl
## 90         ogarnettii_gene_ensembl
## 91               ohni_gene_ensembl
## 92              ohsok_gene_ensembl
## 93           olatipes_gene_ensembl
## 94        omelastigma_gene_ensembl
## 95         oniloticus_gene_ensembl
## 96          oprinceps_gene_ensembl
## 97            pabelii_gene_ensembl
## 98           paltaica_gene_ensembl
## 99            panubis_gene_ensembl
## 100          pbairdii_gene_ensembl
## 101         pcapensis_gene_ensembl
## 102        pcoquereli_gene_ensembl
## 103          pformosa_gene_ensembl
## 104       pkingsleyae_gene_ensembl
## 105        platipinna_gene_ensembl
## 106   pmagnuspinnatus_gene_ensembl
## 107          pmarinus_gene_ensembl
## 108         pmexicana_gene_ensembl
## 109        pnattereri_gene_ensembl
## 110         pnyererei_gene_ensembl
## 111         ppaniscus_gene_ensembl
## 112           ppardus_gene_ensembl
## 113       preticulata_gene_ensembl
## 114         psinensis_gene_ensembl
## 115      ptroglodytes_gene_ensembl
## 116         pvampyrus_gene_ensembl
## 117            rbieti_gene_ensembl
## 118       rnorvegicus_gene_ensembl
## 119        rroxellana_gene_ensembl
## 120          saraneus_gene_ensembl
## 121      sboliviensis_gene_ensembl
## 122       scerevisiae_gene_ensembl
## 123         sdorsalis_gene_ensembl
## 124         sdumerili_gene_ensembl
## 125         sformosus_gene_ensembl
## 126         sharrisii_gene_ensembl
## 127          smaximus_gene_ensembl
## 128         spartitus_gene_ensembl
## 129           sscrofa_gene_ensembl
## 130        tbelangeri_gene_ensembl
## 131          tguttata_gene_ensembl
## 132     tnigroviridis_gene_ensembl
## 133         trubripes_gene_ensembl
## 134        ttruncatus_gene_ensembl
## 135            vpacos_gene_ensembl
## 136       xcouchianus_gene_ensembl
## 137        xmaculatus_gene_ensembl
## 138       xtropicalis_gene_ensembl
##                                                      description
## 1                               Eastern happy genes (fAstCal1.2)
## 2                                 Anole lizard genes (AnoCar2.0)
## 3                                 Midas cichlid genes (Midas_v5)
## 4                                          Panda genes (ailMel1)
## 5                       Cave fish genes (Astyanax_mexicanus-2.0)
## 6                             Ma's night monkey genes (Anan_2.0)
## 7                            Clown anemonefish genes (AmpOce1.0)
## 8                               Orange clownfish genes (Nemo_v1)
## 9                                      Duck genes (BGI_duck_1.0)
## 10                             Spiny chromis genes (ASM210954v1)
## 11                             Climbing perch genes (fAnaTes1.1)
## 12                                            Cow genes (UMD3.1)
## 13                         Brazilian guinea pig genes (CavAp1.0)
## 14                               Sooty mangabey genes (Caty_1.0)
## 15                           Capuchin genes (Cebus_imitator-1.0)
## 16                  Chinese hamster CHOK1GS genes (CHOK1GS_HDv1)
## 17                     Chinese hamster CriGri genes (CriGri_1.0)
## 18                       Caenorhabditis elegans genes (WBcel235)
## 19                                         Dog genes (CanFam3.1)
## 20                                             Goat genes (ARS1)
## 21                                         Sloth genes (choHof1)
## 22                                     C.intestinalis genes (KH)
## 23                                  Marmoset genes (ASM275486v1)
## 24                      Long-tailed chinchilla genes (ChiLan1.0)
## 25                            Angola colobus genes (Cang.pa_1.0)
## 26                                  Guinea Pig genes (Cavpor3.0)
## 27                                  Vervet-AGM genes (ChlSab1.1)
## 28                                   C.savignyi genes (CSAV 2.0)
## 29                                  Tongue sole genes (Cse_v1.0)
## 30                        Tarsier genes (Tarsius_syrichta-2.0.1)
## 31                    Sheepshead minnow genes (C_variegatus-1.0)
## 32                                        Fruitfly genes (BDGP6)
## 33                                   Armadillo genes (Dasnov3.0)
## 34                                 Kangaroo rat genes (Dord_2.0)
## 35                                      Zebrafish genes (GRCz11)
## 36                                  Hagfish genes (Eburgeri_3.2)
## 37                                       Horse genes (Equ Cab 2)
## 38                                      Hedgehog genes (eriEur1)
## 39                                 Northern pike genes (Eluc_V3)
## 40                         Lesser hedgehog tenrec genes (TENREC)
## 41                                 Flycatcher genes (FicAlb_1.4)
## 42                                   Cat genes (Felis_catus_9.0)
## 43                              Damara mole rat genes (DMR_v1.0)
## 44                 Mummichog genes (Fundulus_heteroclitus-3.0.2)
## 45                                  Stickleback genes (BROAD S1)
## 46                      Western mosquitofish genes (ASM309773v1)
## 47                             Chicken genes (Gallus_gallus-5.0)
## 48                                       Gorilla genes (gorGor4)
## 49                                           Cod genes (gadMor1)
## 50                       Burton's mouthbrooder genes (AstBur1.0)
## 51                    Tiger tail seahorse genes (H_comes_QL1_v1)
## 52               Naked mole-rat female genes (HetGla_female_1.0)
## 53                        Naked mole-rat male genes (HetGla_1.0)
## 54                                      Human genes (GRCh38.p12)
## 55                            Channel catfish genes (IpCoco_1.2)
## 56                                    Squirrel genes (SpeTri2.0)
## 57                      Lesser Egyptian jerboa genes (JacJac1.0)
## 58                          Mangrove rivulus genes (ASM164957v1)
## 59                                    Elephant genes (Loxafr3.0)
## 60                              Ballan wrasse genes (BallGen_V1)
## 61                                    Coelacanth genes (LatCha1)
## 62                                   Spotted gar genes (LepOcu1)
## 63                                 Swamp eel genes (M_albus_1.0)
## 64                                Zig-zag eel genes (fMasArm1.1)
## 65                              Golden Hamster genes (MesAur1.0)
## 66                          Ryukyu mouse genes (CAROLI_EIJ_v1.1)
## 67                                       Opossum genes (monDom5)
## 68           Crab-eating macaque genes (Macaca_fascicularis_5.0)
## 69                                   Ferret genes (MusPutFur1.0)
## 70                                    Turkey genes (Turkey_2.01)
## 71                                     Drill genes (Mleu.le_1.0)
## 72                                    Microbat genes (Myoluc2.0)
## 73                             Ocean sunfish genes (ASM169857v1)
## 74                                    Macaque genes (Mmul_8.0.1)
## 75                                  Mouse Lemur genes (Mmur_3.0)
## 76                                       Mouse genes (GRCm38.p6)
## 77                           Pig-tailed macaque genes (Mnem_1.0)
## 78                                Prairie vole genes (MicOch1.0)
## 79                           Shrew mouse genes (PAHARI_EIJ_v1.1)
## 80                           Algerian mouse genes (SPRET_EiJ_v1)
## 81                             Zebra mbuna genes (M_zebra_UMD2a)
## 82                            Lyretail cichlid genes (NeoBri1.0)
## 83                                      Wallaby genes (Meug_1.0)
## 84  Upper Galilee mountains blind mole rat genes (S.galili_v1.0)
## 85                                       Gibbon genes (Nleu_3.0)
## 86                                        Platypus genes (OANA5)
## 87                                        Sheep genes (Oar_v3.1)
## 88                                      Rabbit genes (OryCun2.0)
## 89                                        Degu genes (OctDeg1.0)
## 90                                      Bushbaby genes (OtoGar3)
## 91                       Japanese Medaka HNI genes (ASM223471v1)
## 92                      Japanese Medaka HSOK genes (ASM223469v1)
## 93                      Japanese Medaka HdrR genes (ASM223467v1)
## 94                            Indian medaka genes (Om_v0.7.RACA)
## 95                                     Tilapia genes (Orenil1.0)
## 96                                    Pika genes (OchPri2.0-Ens)
## 97                                       Orangutan genes (PPYG2)
## 98                                       Tiger genes (PanTig1.0)
## 99                                 Olive baboon genes (Panu_3.0)
## 100                Northern American deer mouse genes (Pman_1.0)
## 101                                        Hyrax genes (proCap1)
## 102                           Coquerel's sifaka genes (Pcoq_1.0)
## 103                  Amazon molly genes (Poecilia_formosa-5.1.2)
## 104                  Paramormyrops kingsleyae genes (PKINGS_0.1)
## 105                        Sailfin molly genes (P_latipinna-1.0)
## 106                          Big-finned mudskipper genes (PM.fa)
## 107                                 Lamprey genes (Pmarinus_7.0)
## 108                        Shortfin molly genes (P_mexicana-1.0)
## 109      Red-bellied piranha genes (Pygocentrus_nattereri-1.0.2)
## 110                        Pundamilia nyererei genes (PunNye1.0)
## 111                                     Bonobo genes (panpan1.1)
## 112                                    Leopard genes (PanPar1.0)
## 113                            Guppy genes (Guppy_female_1.0_MT)
## 114                  Chinese softshell turtle genes (PelSin_1.0)
## 115                               Chimpanzee genes (Pan_tro_3.0)
## 116                                      Megabat genes (pteVam1)
## 117                  Black snub-nosed monkey genes (ASM169854v1)
## 118                                         Rat genes (Rnor_6.0)
## 119                     Golden snub-nosed monkey genes (Rrox_v1)
## 120                                        Shrew genes (sorAra1)
## 121                   Bolivian squirrel monkey genes (SaiBol1.0)
## 122                     Saccharomyces cerevisiae genes (R64-1-1)
## 123                          Yellowtail amberjack genes (Sedor1)
## 124                            Greater amberjack genes (Sdu_1.0)
## 125                         Asian bonytongue genes (ASM162426v1)
## 126                       Tasmanian devil genes (Devil_ref v7.0)
## 127                                   Turbot genes (ASM318616v1)
## 128          Bicolor damselfish genes (Stegastes_partitus-1.0.2)
## 129                                      Pig genes (Sscrofa11.1)
## 130                                   Tree Shrew genes (tupBel1)
## 131                              Zebra Finch genes (taeGut3.2.4)
## 132                              Tetraodon genes (TETRAODON 8.0)
## 133                                           Fugu genes (FUGU5)
## 134                                      Dolphin genes (turTru1)
## 135                                       Alpaca genes (vicPac1)
## 136     Monterrey platyfish genes (Xiphophorus_couchianus-4.0.1)
## 137                       Platyfish genes (X_maculatus-5.0-male)
## 138                                      Xenopus genes (JGI 4.2)
##                          version
## 1                     fAstCal1.2
## 2                      AnoCar2.0
## 3                       Midas_v5
## 4                        ailMel1
## 5         Astyanax_mexicanus-2.0
## 6                       Anan_2.0
## 7                      AmpOce1.0
## 8                        Nemo_v1
## 9                   BGI_duck_1.0
## 10                   ASM210954v1
## 11                    fAnaTes1.1
## 12                        UMD3.1
## 13                      CavAp1.0
## 14                      Caty_1.0
## 15            Cebus_imitator-1.0
## 16                  CHOK1GS_HDv1
## 17                    CriGri_1.0
## 18                      WBcel235
## 19                     CanFam3.1
## 20                          ARS1
## 21                       choHof1
## 22                            KH
## 23                   ASM275486v1
## 24                     ChiLan1.0
## 25                   Cang.pa_1.0
## 26                     Cavpor3.0
## 27                     ChlSab1.1
## 28                      CSAV 2.0
## 29                      Cse_v1.0
## 30        Tarsius_syrichta-2.0.1
## 31              C_variegatus-1.0
## 32                         BDGP6
## 33                     Dasnov3.0
## 34                      Dord_2.0
## 35                        GRCz11
## 36                  Eburgeri_3.2
## 37                     Equ Cab 2
## 38                       eriEur1
## 39                       Eluc_V3
## 40                        TENREC
## 41                    FicAlb_1.4
## 42               Felis_catus_9.0
## 43                      DMR_v1.0
## 44   Fundulus_heteroclitus-3.0.2
## 45                      BROAD S1
## 46                   ASM309773v1
## 47             Gallus_gallus-5.0
## 48                       gorGor4
## 49                       gadMor1
## 50                     AstBur1.0
## 51                H_comes_QL1_v1
## 52             HetGla_female_1.0
## 53                    HetGla_1.0
## 54                    GRCh38.p12
## 55                    IpCoco_1.2
## 56                     SpeTri2.0
## 57                     JacJac1.0
## 58                   ASM164957v1
## 59                     Loxafr3.0
## 60                    BallGen_V1
## 61                       LatCha1
## 62                       LepOcu1
## 63                   M_albus_1.0
## 64                    fMasArm1.1
## 65                     MesAur1.0
## 66               CAROLI_EIJ_v1.1
## 67                       monDom5
## 68       Macaca_fascicularis_5.0
## 69                  MusPutFur1.0
## 70                   Turkey_2.01
## 71                   Mleu.le_1.0
## 72                     Myoluc2.0
## 73                   ASM169857v1
## 74                    Mmul_8.0.1
## 75                      Mmur_3.0
## 76                     GRCm38.p6
## 77                      Mnem_1.0
## 78                     MicOch1.0
## 79               PAHARI_EIJ_v1.1
## 80                  SPRET_EiJ_v1
## 81                 M_zebra_UMD2a
## 82                     NeoBri1.0
## 83                      Meug_1.0
## 84                 S.galili_v1.0
## 85                      Nleu_3.0
## 86                         OANA5
## 87                      Oar_v3.1
## 88                     OryCun2.0
## 89                     OctDeg1.0
## 90                       OtoGar3
## 91                   ASM223471v1
## 92                   ASM223469v1
## 93                   ASM223467v1
## 94                  Om_v0.7.RACA
## 95                     Orenil1.0
## 96                 OchPri2.0-Ens
## 97                         PPYG2
## 98                     PanTig1.0
## 99                      Panu_3.0
## 100                     Pman_1.0
## 101                      proCap1
## 102                     Pcoq_1.0
## 103       Poecilia_formosa-5.1.2
## 104                   PKINGS_0.1
## 105              P_latipinna-1.0
## 106                        PM.fa
## 107                 Pmarinus_7.0
## 108               P_mexicana-1.0
## 109  Pygocentrus_nattereri-1.0.2
## 110                    PunNye1.0
## 111                    panpan1.1
## 112                    PanPar1.0
## 113          Guppy_female_1.0_MT
## 114                   PelSin_1.0
## 115                  Pan_tro_3.0
## 116                      pteVam1
## 117                  ASM169854v1
## 118                     Rnor_6.0
## 119                      Rrox_v1
## 120                      sorAra1
## 121                    SaiBol1.0
## 122                      R64-1-1
## 123                       Sedor1
## 124                      Sdu_1.0
## 125                  ASM162426v1
## 126               Devil_ref v7.0
## 127                  ASM318616v1
## 128     Stegastes_partitus-1.0.2
## 129                  Sscrofa11.1
## 130                      tupBel1
## 131                  taeGut3.2.4
## 132                TETRAODON 8.0
## 133                        FUGU5
## 134                      turTru1
## 135                      vicPac1
## 136 Xiphophorus_couchianus-4.0.1
## 137         X_maculatus-5.0-male
## 138                      JGI 4.2
CellType=read.csv('./InputHuAgeDis/CellType_GSE52564_FCover20_Signature.csv', header=T, sep=',')
CellType=CellType[!duplicated(CellType$Gene_mouse),]
rownames(CellType)=CellType$Gene_mouse
mouse = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'mmusculus_gene_ensembl')
Human = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'hsapiens_gene_ensembl')
myMap = getLDS(attributes = "mgi_symbol", filters = 'mgi_symbol',
    values = CellType$Gene_mouse, mart = mouse, 
    attributesL = c("hgnc_symbol"), martL = Human)
#Some genes will be duplicates (HLA), so let's make them unique.
myMap=myMap[!duplicated(myMap$HGNC.symbol), ]
#unique(myMap$HGNC.symbol)
myMap_new<-myMap
names(myMap_new) <- c("Gene_mouse", "HuGene")
DT::datatable(head(myMap_new))
CellType_Hu=merge(CellType,myMap_new,by='Gene_mouse')
colnames(CellType_Hu)[colnames(CellType_Hu)=="Gene_mouse"] <- "MsGene"
CellType_Hu=CellType_Hu[c("HuGene","CellType_Vs_Others","MsGene")]
CellType_Hu=CellType_Hu[!duplicated(CellType_Hu$HuGene), ]
rownames(CellType_Hu)=CellType_Hu$HuGene
CellType_Hu=CellType_Hu[,-1]
write.csv(CellType_Hu,"CellType_GSE52564_FCover20_Signature_Hu.csv")
celltype=read.csv("CellType_GSE52564_FCover20_Signature_Hu.csv",sep=',')
cellTypeGeneCount=as.data.frame(table(celltype$CellType_Vs_Others))
cellTypeGeneCount
##                                Var1 Freq
## 1    AstroByothersCuffdiff2FCover20  452
## 2  EndotheByothersCuffdiff2FCover20  621
## 3 MicrogliByothersCuffdiff2FCover20 1149
## 4 MyeOligoByothersCuffdiff2FCover20  263
## 5   NeuronByothersCuffdiff2FCover20  709
## 6 NewOligoByothersCuffdiff2FCover20   82
## 7 PreOligoByothersCuffdiff2FCover20  116
write.csv(cellTypeGeneCount,"CellType_GSE52564_FCover20_Signature_Hu_GeneCount.csv")

Step27: Enrichment test for comparison with Cuffdiff2 defined cell type signature genes from microglia, neuron, astrocyte, (precursor, new, mature)oligodendrocyte, endothelial cells

#This is the actual calulation of hypergeometric enrichment
enrichmentsCellType = userListEnrichment(rownames(datExprA1g2), modulesA1,"CellType_GSE52564_FCover20_Signature_Hu.csv", "", "enrichmentsCellType_Sigificantcolors.csv")
## 77 comparisons were successfully performed.

Saving results

#List of genes that overlap between modules and cell type
enrichmentsCellType_OvGenes=enrichmentsCellType[[2]]
dir.create("./enrichmentsCellType_OvGenes")
for (i in 1:length(enrichmentsCellType_OvGenes)) {
  write.csv(enrichmentsCellType_OvGenes[i], file=paste0("enrichmentsCellType_OvGenes/", names(enrichmentsCellType_OvGenes)[i], ".txt"))
}

#List of genes that overlap between modules and cell type
enrichmentsCellType_NumOvGenes_pvalue=enrichmentsCellType[[1]]
write.csv(enrichmentsCellType_NumOvGenes_pvalue,"enrichments_NumOvGenes_pvalue_colors.csv")
geneListsModuleSummary=read.csv('geneListsModuleSummarycolors.csv', sep=',')
enrichmentsSummary=read.csv('enrichments_NumOvGenes_pvalue_colors.csv', sep=',')
geneListsModuleSummaryMod=table(geneListsModuleSummary$modulesA1)
geneListsModuleSummaryMod=as.data.frame(geneListsModuleSummaryMod)
write.csv(geneListsModuleSummaryMod, "geneListsModuleSummarycolors_GeneCount.csv")

colnames(geneListsModuleSummaryMod)=c("InputCategories", "Freq")
geneListsModuleSummaryMod
##    InputCategories Freq
## 1            black  113
## 2             blue  470
## 3            brown  413
## 4            green  239
## 5      greenyellow   49
## 6             grey 3877
## 7          magenta   69
## 8             pink   82
## 9           purple   58
## 10             red  175
## 11       turquoise 1501
## 12          yellow  255
enrichmentsSummarymergeMod=merge(enrichmentsSummary,geneListsModuleSummaryMod, by="InputCategories")
write.csv(enrichmentsSummarymergeMod,"enrichments_SummarymergeMod_colors.csv")

Same as above for labeled modules

enrichmentsCellTypeL = userListEnrichment(rownames(datExprA1g2), modulesA1L,"CellType_GSE52564_FCover20_Signature_Hu.csv", "", "enrichmentsCellType_Sigificantlabels.csv")
## 84 comparisons were successfully performed.
enrichmentsCellType_OvGenesL=enrichmentsCellTypeL[[2]]
for (i in 1:length(enrichmentsCellType_OvGenesL)) {
  write.csv(enrichmentsCellType_OvGenesL[i], file=paste0("enrichmentsCellType_OvGenes/", names(enrichmentsCellType_OvGenesL)[i], ".txt"))
}

enrichmentsCellType_NumOvGenes_pvalueL=enrichmentsCellTypeL[[1]]
write.csv(enrichmentsCellType_NumOvGenes_pvalueL,"enrichments_NumOvGenes_pvalue_labels.csv")
geneListsModuleSummaryL=read.csv('geneListsModuleSummarylabels.csv', sep=',')
enrichmentsSummaryL=read.csv('enrichments_NumOvGenes_pvalue_labels.csv', sep=',')
geneListsModuleSummaryModL=table(geneListsModuleSummaryL$modulesA1L)
geneListsModuleSummaryModL=as.data.frame(geneListsModuleSummaryModL)
write.csv(geneListsModuleSummaryModL, "geneListsModuleSummarylabels_GeneCount.csv")

colnames(geneListsModuleSummaryModL)=c("InputCategories", "Freq")
enrichmentsSummarymergeModL=merge(enrichmentsSummaryL,geneListsModuleSummaryModL, by="InputCategories")
write.csv(enrichmentsSummarymergeModL,"enrichments_SummarymergeMod_labels.csv")

Step 28: Cross-pipeline preservation and hypergeometric test: Qualitatively and quantitatively measure cross-species, cross-pipeline and/or any cross-dataset network preservation at the module level

Will export the files for preservation analysis and hypergeometric test

#Export expression file
write.csv(datExprA1g2,"ForPres&Hyper_HuAgeDis_datExprA1g2.csv")
#Export the module and gene names for use Preservation and hypergeometric test 
write.csv(kMEtable1[,c("PSID","Gene","Module")],"ForPres&Hyper_HuAgeDis_GeneModule.csv")
write.csv(kMEtable1L[,c("PSID","Gene","Module")],"ForPres&Hyper_HuAgeDis_GeneModuleL.csv")

below select black module which has microglia genes

Step29: Annotation of module enriched with microglia signature genes

Select enrichR databases

dbs <- listEnrichrDbs()
dbs
##                                           libraryName numTerms
## 1                                 Genome_Browser_PWMs      615
## 2                            TRANSFAC_and_JASPAR_PWMs      326
## 3                           Transcription_Factor_PPIs      290
## 4                                           ChEA_2013      353
## 5                    Drug_Perturbations_from_GEO_2014      701
## 6                             ENCODE_TF_ChIP-seq_2014      498
## 7                                       BioCarta_2013      249
## 8                                       Reactome_2013       78
## 9                                   WikiPathways_2013      199
## 10                Disease_Signatures_from_GEO_up_2014      142
## 11                                          KEGG_2013      200
## 12                         TF-LOF_Expression_from_GEO      269
## 13                                TargetScan_microRNA      222
## 14                                   PPI_Hub_Proteins      385
## 15                         GO_Molecular_Function_2015     1136
## 16                                          GeneSigDB     2139
## 17                                Chromosome_Location      386
## 18                                   Human_Gene_Atlas       84
## 19                                   Mouse_Gene_Atlas       96
## 20                         GO_Cellular_Component_2015      641
## 21                         GO_Biological_Process_2015     5192
## 22                           Human_Phenotype_Ontology     1779
## 23                    Epigenomics_Roadmap_HM_ChIP-seq      383
## 24                                           KEA_2013      474
## 25                  NURSA_Human_Endogenous_Complexome     1796
## 26                                              CORUM     1658
## 27                            SILAC_Phosphoproteomics       84
## 28                    MGI_Mammalian_Phenotype_Level_3       71
## 29                    MGI_Mammalian_Phenotype_Level_4      476
## 30                                        Old_CMAP_up     6100
## 31                                      Old_CMAP_down     6100
## 32                                       OMIM_Disease       90
## 33                                      OMIM_Expanded      187
## 34                                          VirusMINT       85
## 35                               MSigDB_Computational      858
## 36                        MSigDB_Oncogenic_Signatures      189
## 37              Disease_Signatures_from_GEO_down_2014      142
## 38                    Virus_Perturbations_from_GEO_up      323
## 39                  Virus_Perturbations_from_GEO_down      323
## 40                      Cancer_Cell_Line_Encyclopedia      967
## 41                           NCI-60_Cancer_Cell_Lines       93
## 42        Tissue_Protein_Expression_from_ProteomicsDB      207
## 43  Tissue_Protein_Expression_from_Human_Proteome_Map       30
## 44                                   HMDB_Metabolites     3906
## 45                              Pfam_InterPro_Domains      311
## 46                         GO_Biological_Process_2013      941
## 47                         GO_Cellular_Component_2013      205
## 48                         GO_Molecular_Function_2013      402
## 49                               Allen_Brain_Atlas_up     2192
## 50                            ENCODE_TF_ChIP-seq_2015      816
## 51                  ENCODE_Histone_Modifications_2015      412
## 52                  Phosphatase_Substrates_from_DEPOD       59
## 53                             Allen_Brain_Atlas_down     2192
## 54                  ENCODE_Histone_Modifications_2013      109
## 55                          Achilles_fitness_increase      216
## 56                          Achilles_fitness_decrease      216
## 57                       MGI_Mammalian_Phenotype_2013      476
## 58                                      BioCarta_2015      239
## 59                                      HumanCyc_2015      125
## 60                                          KEGG_2015      179
## 61                                    NCI-Nature_2015      209
## 62                                       Panther_2015      104
## 63                                  WikiPathways_2015      404
## 64                                      Reactome_2015     1389
## 65                                             ESCAPE      315
## 66                                         HomoloGene       12
## 67                Disease_Perturbations_from_GEO_down      839
## 68                  Disease_Perturbations_from_GEO_up      839
## 69                   Drug_Perturbations_from_GEO_down      906
## 70                   Genes_Associated_with_NIH_Grants    32876
## 71                     Drug_Perturbations_from_GEO_up      906
## 72                                           KEA_2015      428
## 73              Single_Gene_Perturbations_from_GEO_up     2460
## 74            Single_Gene_Perturbations_from_GEO_down     2460
## 75                                          ChEA_2015      395
## 76                                              dbGaP      345
## 77                           LINCS_L1000_Chem_Pert_up    33132
## 78                         LINCS_L1000_Chem_Pert_down    33132
## 79   GTEx_Tissue_Sample_Gene_Expression_Profiles_down     2918
## 80     GTEx_Tissue_Sample_Gene_Expression_Profiles_up     2918
## 81                 Ligand_Perturbations_from_GEO_down      261
## 82                  Aging_Perturbations_from_GEO_down      286
## 83                    Aging_Perturbations_from_GEO_up      286
## 84                   Ligand_Perturbations_from_GEO_up      261
## 85                   MCF7_Perturbations_from_GEO_down      401
## 86                     MCF7_Perturbations_from_GEO_up      401
## 87                Microbe_Perturbations_from_GEO_down      312
## 88                  Microbe_Perturbations_from_GEO_up      312
## 89              LINCS_L1000_Ligand_Perturbations_down       96
## 90                LINCS_L1000_Ligand_Perturbations_up       96
## 91              LINCS_L1000_Kinase_Perturbations_down     3644
## 92                LINCS_L1000_Kinase_Perturbations_up     3644
## 93                                      Reactome_2016     1530
## 94                                          KEGG_2016      293
## 95                                  WikiPathways_2016      437
## 96          ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X      104
## 97                 Kinase_Perturbations_from_GEO_down      285
## 98                   Kinase_Perturbations_from_GEO_up      285
## 99                                      BioCarta_2016      237
## 100                                     HumanCyc_2016      152
## 101                                   NCI-Nature_2016      209
## 102                                      Panther_2016      112
## 103                                        DrugMatrix     7876
## 104                                         ChEA_2016      645
## 105                                             huMAP      995
## 106                                    Jensen_TISSUES     1842
## 107 RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO     1302
## 108                      MGI_Mammalian_Phenotype_2017     5231
## 109                               Jensen_COMPARTMENTS     2283
## 110                                   Jensen_DISEASES     1811
## 111                                      BioPlex_2017     3915
## 112                        GO_Cellular_Component_2017      636
## 113                        GO_Molecular_Function_2017      972
## 114                        GO_Biological_Process_2017     3166
## 115                       GO_Cellular_Component_2017b      816
## 116                       GO_Molecular_Function_2017b     3271
## 117                       GO_Biological_Process_2017b    10125
## 118                                    ARCHS4_Tissues      108
## 119                                 ARCHS4_Cell-lines      125
## 120                                  ARCHS4_IDG_Coexp      352
## 121                              ARCHS4_Kinases_Coexp      498
## 122                                  ARCHS4_TFs_Coexp     1724
## 123                           SysMyo_Muscle_Gene_Sets     1135
## 124                                   miRTarBase_2017     3240
## 125                          TargetScan_microRNA_2017      683
## 126              Enrichr_Libraries_Most_Popular_Genes      121
## 127           Enrichr_Submissions_TF-Gene_Coocurrence     1722
## 128        Data_Acquisition_Method_Most_Popular_Genes       12
## 129                                            DSigDB     4026
## 130                        GO_Biological_Process_2018     5103
## 131                        GO_Cellular_Component_2018      446
## 132                        GO_Molecular_Function_2018     1151
## 133           TF_Perturbations_Followed_by_Expression     1958
##     geneCoverage genesPerTerm
## 1          13362          275
## 2          27884         1284
## 3           6002           77
## 4          47172         1370
## 5          47107          509
## 6          21493         3713
## 7           1295           18
## 8           3185           73
## 9           2854           34
## 10         15057          300
## 11          4128           48
## 12         34061          641
## 13          7504          155
## 14         16399          247
## 15         12753           57
## 16         23726          127
## 17         32740           85
## 18         13373          258
## 19         19270          388
## 20         13236           82
## 21         14264           58
## 22          3096           31
## 23         22288         4368
## 24          4533           37
## 25         10231          158
## 26          2741            5
## 27          5655          342
## 28         10406          715
## 29         10493          200
## 30         11251          100
## 31          8695          100
## 32          1759           25
## 33          2178           89
## 34           851           15
## 35         10061          106
## 36         11250          166
## 37         15406          300
## 38         17711          300
## 39         17576          300
## 40         15797          176
## 41         12232          343
## 42         13572          301
## 43          6454          301
## 44          3723           47
## 45          7588           35
## 46          7682           78
## 47          7324          172
## 48          8469          122
## 49         13121          305
## 50         26382         1811
## 51         29065         2123
## 52           280            9
## 53         13877          304
## 54         15852          912
## 55          4320          129
## 56          4271          128
## 57         10496          201
## 58          1678           21
## 59           756           12
## 60          3800           48
## 61          2541           39
## 62          1918           39
## 63          5863           51
## 64          6768           47
## 65         25651          807
## 66         19129         1594
## 67         23939          293
## 68         23561          307
## 69         23877          302
## 70         15886            9
## 71         24350          299
## 72          3102           25
## 73         31132          298
## 74         30832          302
## 75         48230         1429
## 76          5613           36
## 77          9559           73
## 78          9448           63
## 79         16725         1443
## 80         19249         1443
## 81         15090          282
## 82         16129          292
## 83         15309          308
## 84         15103          318
## 85         15022          290
## 86         15676          310
## 87         15854          279
## 88         15015          321
## 89          3788          159
## 90          3357          153
## 91         12668          300
## 92         12638          300
## 93          8973           64
## 94          7010           87
## 95          5966           51
## 96         15562          887
## 97         17850          300
## 98         17660          300
## 99          1348           19
## 100          934           13
## 101         2541           39
## 102         2041           42
## 103         5209          300
## 104        49238         1550
## 105         2243           19
## 106        19586          545
## 107        22440          505
## 108         8184           24
## 109        18329          161
## 110        15755           28
## 111        10271           22
## 112        10427           38
## 113        10601           25
## 114        13822           21
## 115         8002          143
## 116        10089           45
## 117        13247           49
## 118        21809         2316
## 119        23601         2395
## 120        20883          299
## 121        19612          299
## 122        25983          299
## 123        19500          137
## 124        14893          128
## 125        17598         1208
## 126         5902          109
## 127        12486          299
## 128         1073          100
## 129        19513          117
## 130        14433           36
## 131         8655           61
## 132        11459           39
## 133        19741          270
##                                                                          link
## 1                    http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/
## 2                                    http://jaspar.genereg.net/html/DOWNLOAD/
## 3                                                                            
## 4                              http://amp.pharm.mssm.edu/lib/cheadownload.jsp
## 5                                            http://www.ncbi.nlm.nih.gov/geo/
## 6                                http://genome.ucsc.edu/ENCODE/downloads.html
## 7                         https://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 8                                 http://www.reactome.org/download/index.html
## 9                     http://www.wikipathways.org/index.php/Download_Pathways
## 10                                           http://www.ncbi.nlm.nih.gov/geo/
## 11                                          http://www.kegg.jp/kegg/download/
## 12                                           http://www.ncbi.nlm.nih.gov/geo/
## 13  http://www.targetscan.org/cgi-bin/targetscan/data_download.cgi?db=vert_61
## 14                                              http://amp.pharm.mssm.edu/X2K
## 15                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 16                             http://genesigdb.org/genesigdb/downloadall.jsp
## 17                   http://software.broadinstitute.org/gsea/msigdb/index.jsp
## 18                                               http://biogps.org/downloads/
## 19                                               http://biogps.org/downloads/
## 20                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 21                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 22                                   http://www.human-phenotype-ontology.org/
## 23                                         http://www.roadmapepigenomics.org/
## 24                           http://amp.pharm.mssm.edu/lib/keacommandline.jsp
## 25                                      https://www.nursa.org/nursa/index.jsf
## 26                        http://mips.helmholtz-muenchen.de/genre/proj/corum/
## 27                           http://amp.pharm.mssm.edu/lib/keacommandline.jsp
## 28                                            http://www.informatics.jax.org/
## 29                                            http://www.informatics.jax.org/
## 30                                        http://www.broadinstitute.org/cmap/
## 31                                        http://www.broadinstitute.org/cmap/
## 32                                              http://www.omim.org/downloads
## 33                                              http://www.omim.org/downloads
## 34                                  http://mint.bio.uniroma2.it/download.html
## 35                  http://www.broadinstitute.org/gsea/msigdb/collections.jsp
## 36                  http://www.broadinstitute.org/gsea/msigdb/collections.jsp
## 37                                           http://www.ncbi.nlm.nih.gov/geo/
## 38                                           http://www.ncbi.nlm.nih.gov/geo/
## 39                                           http://www.ncbi.nlm.nih.gov/geo/
## 40                             https://portals.broadinstitute.org/ccle/home\n
## 41                                               http://biogps.org/downloads/
## 42                                              https://www.proteomicsdb.org/
## 43                                  http://www.humanproteomemap.org/index.php
## 44                                               http://www.hmdb.ca/downloads
## 45                                ftp://ftp.ebi.ac.uk/pub/databases/interpro/
## 46                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 47                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 48                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 49                                                  http://www.brain-map.org/
## 50                               http://genome.ucsc.edu/ENCODE/downloads.html
## 51                               http://genome.ucsc.edu/ENCODE/downloads.html
## 52                                            http://www.koehn.embl.de/depod/
## 53                                                  http://www.brain-map.org/
## 54                               http://genome.ucsc.edu/ENCODE/downloads.html
## 55                                     http://www.broadinstitute.org/achilles
## 56                                     http://www.broadinstitute.org/achilles
## 57                                            http://www.informatics.jax.org/
## 58                        https://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 59                                                       http://humancyc.org/
## 60                                          http://www.kegg.jp/kegg/download/
## 61                                                    http://pid.nci.nih.gov/
## 62                                                  http://www.pantherdb.org/
## 63                    http://www.wikipathways.org/index.php/Download_Pathways
## 64                                http://www.reactome.org/download/index.html
## 65                                           http://www.maayanlab.net/ESCAPE/
## 66                                     http://www.ncbi.nlm.nih.gov/homologene
## 67                                           http://www.ncbi.nlm.nih.gov/geo/
## 68                                           http://www.ncbi.nlm.nih.gov/geo/
## 69                                           http://www.ncbi.nlm.nih.gov/geo/
## 70                                    https://grants.nih.gov/grants/oer.htm\n
## 71                                           http://www.ncbi.nlm.nih.gov/geo/
## 72                                          http://amp.pharm.mssm.edu/Enrichr
## 73                                           http://www.ncbi.nlm.nih.gov/geo/
## 74                                           http://www.ncbi.nlm.nih.gov/geo/
## 75                                          http://amp.pharm.mssm.edu/Enrichr
## 76                                            http://www.ncbi.nlm.nih.gov/gap
## 77                                                           https://clue.io/
## 78                                                           https://clue.io/
## 79                                                 http://www.gtexportal.org/
## 80                                                 http://www.gtexportal.org/
## 81                                           http://www.ncbi.nlm.nih.gov/geo/
## 82                                           http://www.ncbi.nlm.nih.gov/geo/
## 83                                           http://www.ncbi.nlm.nih.gov/geo/
## 84                                           http://www.ncbi.nlm.nih.gov/geo/
## 85                                           http://www.ncbi.nlm.nih.gov/geo/
## 86                                           http://www.ncbi.nlm.nih.gov/geo/
## 87                                           http://www.ncbi.nlm.nih.gov/geo/
## 88                                           http://www.ncbi.nlm.nih.gov/geo/
## 89                                                           https://clue.io/
## 90                                                           https://clue.io/
## 91                                                           https://clue.io/
## 92                                                           https://clue.io/
## 93                                http://www.reactome.org/download/index.html
## 94                                          http://www.kegg.jp/kegg/download/
## 95                    http://www.wikipathways.org/index.php/Download_Pathways
## 96                                                                           
## 97                                           http://www.ncbi.nlm.nih.gov/geo/
## 98                                           http://www.ncbi.nlm.nih.gov/geo/
## 99                         http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 100                                                      http://humancyc.org/
## 101                                                   http://pid.nci.nih.gov/
## 102                                         http://www.pantherdb.org/pathway/
## 103                                     https://ntp.niehs.nih.gov/drugmatrix/
## 104                                         http://amp.pharm.mssm.edu/Enrichr
## 105                                              http://proteincomplexes.org/
## 106                                             http://tissues.jensenlab.org/
## 107                                          http://www.ncbi.nlm.nih.gov/geo/
## 108                                           http://www.informatics.jax.org/
## 109                                        http://compartments.jensenlab.org/
## 110                                            http://diseases.jensenlab.org/
## 111                                           http://bioplex.hms.harvard.edu/
## 112                                              http://www.geneontology.org/
## 113                                              http://www.geneontology.org/
## 114                                              http://www.geneontology.org/
## 115                                              http://www.geneontology.org/
## 116                                              http://www.geneontology.org/
## 117                                              http://www.geneontology.org/
## 118                                          http://amp.pharm.mssm.edu/archs4
## 119                                          http://amp.pharm.mssm.edu/archs4
## 120                                          http://amp.pharm.mssm.edu/archs4
## 121                                          http://amp.pharm.mssm.edu/archs4
## 122                                          http://amp.pharm.mssm.edu/archs4
## 123                                               http://sys-myo.rhcloud.com/
## 124                                        http://mirtarbase.mbc.nctu.edu.tw/
## 125                                                http://www.targetscan.org/
## 126                                         http://amp.pharm.mssm.edu/Enrichr
## 127                                         http://amp.pharm.mssm.edu/Enrichr
## 128                                         http://amp.pharm.mssm.edu/Enrichr
## 129                             http://tanlab.ucdenver.edu/DSigDB/DSigDBv1.0/
## 130                                              http://www.geneontology.org/
## 131                                              http://www.geneontology.org/
## 132                                              http://www.geneontology.org/
## 133                                          http://www.ncbi.nlm.nih.gov/geo/
#select pathway databases of choice from above
dbs_select<-c("GO_Biological_Process_2018", "KEGG_2016", "Reactome_2016")

Select color module of interest

SelectGeneListA1=read.csv('geneListsModuleSummarycolors.csv',header=T, sep=',')
SelectGeneListA1=SelectGeneListA1$Gene[modulesA1=='black']
#compare for enrichment with our Genes
enriched <- enrichr(SelectGeneListA1, dbs_select)
## Uploading data to Enrichr... Done.
##   Querying GO_Biological_Process_2018... Done.
##   Querying KEGG_2016... Done.
##   Querying Reactome_2016... Done.
## Parsing results... Done.
#GO Biological Process results
SelectGeneListA1_GOBiologicalProcess<-as.data.frame(enriched[["GO_Biological_Process_2018"]])
write.csv(SelectGeneListA1_GOBiologicalProcess,file="SelectGeneListA1_GOBiologicalProcess_black.csv")
GOBiologicalProcessData<-SelectGeneListA1_GOBiologicalProcess[order(-SelectGeneListA1_GOBiologicalProcess$Combined.Score),]
GOBiologicalProcessData<-GOBiologicalProcessData[1:10,]
ggplot(data=GOBiologicalProcessData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="stack", stat="identity")+
  labs(title="GOBiologicalProcess Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="black")

dev.print(pdf,"SelectGeneListA1_GOBiologicalProcessTop10plot_black.pdf", width=15, height=5)
## quartz_off_screen 
##                 2
#KEEG pathway results
SelectGeneListA1_KEEGpathway<-as.data.frame(enriched[["KEGG_2016"]])
write.csv(SelectGeneListA1_KEEGpathway,file="SelectGeneListA1_KEEGpathway_black.csv")
KEEGpathwayData<-SelectGeneListA1_KEEGpathway[order(-SelectGeneListA1_KEEGpathway$Combined.Score),]
KEEGpathwayData<-KEEGpathwayData[1:10,]
ggplot(data=KEEGpathwayData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="stack", stat="identity")+
  labs(title="KEEGpathway Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
  labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="black")

dev.print(pdf,"SelectGeneListA1_KEEGpathwayTop10plot_black.pdf", width=10, height=5)
## quartz_off_screen 
##                 2
#Reactome pathway results
SelectGeneListA1_Reactome<-as.data.frame(enriched[["Reactome_2016"]])
write.csv(SelectGeneListA1_Reactome,"SelectGeneListA1_Reactome_black.csv")
ReactomeData<-SelectGeneListA1_Reactome[order(-SelectGeneListA1_Reactome$Combined.Score),]
ReactomeData<-ReactomeData[1:10,]
ggplot(data=ReactomeData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="stack", stat="identity")+
  labs(title="Reactome Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
  labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="black")

dev.print(pdf,"SelectGeneListA1_ReactomeTop10plot_black.pdf", width=10, height=5)
## quartz_off_screen 
##                 2

hub genes from black module which is aging and disease associated ahd has microglia genes

Step30: Aging association and plot of ‘microglia’ genes enriched in ‘black’ module in aging data A1

#Microglia genes in black module are stored in a variable for plotting. This can be replace with any choice of genes present in a given module.  
black_Microglia_hubgenes=c("TBXAS1","PTPRC","TYROBP","ITGB2","MYO1F","CYBA","LST1","SLC7A7","RNASET2","FCER1G")
summary(black_Microglia_hubgenes)
##    Length     Class      Mode 
##        10 character character
#view metadata and expression data A1
DT::datatable(metadataA1g_coded)
DT::datatable(datExprA1g2)